Repository: blevesearch/bleve
Branch: master
Commit: 6cdc0b4809a5
Files: 759
Total size: 4.5 MB
Directory structure:
gitextract_lgtz3phf/
├── .github/
│ └── workflows/
│ ├── cover.yml
│ └── tests.yml
├── .gitignore
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── SECURITY.md
├── analysis/
│ ├── analyzer/
│ │ ├── custom/
│ │ │ └── custom.go
│ │ ├── keyword/
│ │ │ └── keyword.go
│ │ ├── simple/
│ │ │ └── simple.go
│ │ ├── standard/
│ │ │ └── standard.go
│ │ └── web/
│ │ └── web.go
│ ├── benchmark_test.go
│ ├── char/
│ │ ├── asciifolding/
│ │ │ ├── asciifolding.go
│ │ │ └── asciifolding_test.go
│ │ ├── html/
│ │ │ └── html.go
│ │ ├── regexp/
│ │ │ ├── regexp.go
│ │ │ └── regexp_test.go
│ │ └── zerowidthnonjoiner/
│ │ └── zerowidthnonjoiner.go
│ ├── datetime/
│ │ ├── flexible/
│ │ │ ├── flexible.go
│ │ │ └── flexible_test.go
│ │ ├── iso/
│ │ │ ├── iso.go
│ │ │ └── iso_test.go
│ │ ├── optional/
│ │ │ └── optional.go
│ │ ├── percent/
│ │ │ ├── percent.go
│ │ │ └── percent_test.go
│ │ ├── sanitized/
│ │ │ ├── sanitized.go
│ │ │ └── sanitized_test.go
│ │ └── timestamp/
│ │ ├── microseconds/
│ │ │ └── microseconds.go
│ │ ├── milliseconds/
│ │ │ └── milliseconds.go
│ │ ├── nanoseconds/
│ │ │ └── nanoseconds.go
│ │ └── seconds/
│ │ └── seconds.go
│ ├── freq.go
│ ├── freq_test.go
│ ├── lang/
│ │ ├── ar/
│ │ │ ├── analyzer_ar.go
│ │ │ ├── analyzer_ar_test.go
│ │ │ ├── arabic_normalize.go
│ │ │ ├── arabic_normalize_test.go
│ │ │ ├── stemmer_ar.go
│ │ │ ├── stemmer_ar_test.go
│ │ │ ├── stop_filter_ar.go
│ │ │ └── stop_words_ar.go
│ │ ├── bg/
│ │ │ ├── stop_filter_bg.go
│ │ │ └── stop_words_bg.go
│ │ ├── ca/
│ │ │ ├── articles_ca.go
│ │ │ ├── elision_ca.go
│ │ │ ├── elision_ca_test.go
│ │ │ ├── stop_filter_ca.go
│ │ │ └── stop_words_ca.go
│ │ ├── cjk/
│ │ │ ├── analyzer_cjk.go
│ │ │ ├── analyzer_cjk_test.go
│ │ │ ├── cjk_bigram.go
│ │ │ ├── cjk_bigram_test.go
│ │ │ ├── cjk_width.go
│ │ │ └── cjk_width_test.go
│ │ ├── ckb/
│ │ │ ├── analyzer_ckb.go
│ │ │ ├── analyzer_ckb_test.go
│ │ │ ├── sorani_normalize.go
│ │ │ ├── sorani_normalize_test.go
│ │ │ ├── sorani_stemmer_filter.go
│ │ │ ├── sorani_stemmer_filter_test.go
│ │ │ ├── stop_filter_ckb.go
│ │ │ └── stop_words_ckb.go
│ │ ├── cs/
│ │ │ ├── stop_filter_cs.go
│ │ │ └── stop_words_cs.go
│ │ ├── da/
│ │ │ ├── analyzer_da.go
│ │ │ ├── analyzer_da_test.go
│ │ │ ├── stemmer_da.go
│ │ │ ├── stop_filter_da.go
│ │ │ └── stop_words_da.go
│ │ ├── de/
│ │ │ ├── analyzer_de.go
│ │ │ ├── analyzer_de_test.go
│ │ │ ├── german_normalize.go
│ │ │ ├── german_normalize_test.go
│ │ │ ├── light_stemmer_de.go
│ │ │ ├── stemmer_de_snowball.go
│ │ │ ├── stemmer_de_test.go
│ │ │ ├── stop_filter_de.go
│ │ │ └── stop_words_de.go
│ │ ├── el/
│ │ │ ├── stop_filter_el.go
│ │ │ └── stop_words_el.go
│ │ ├── en/
│ │ │ ├── analyzer_en.go
│ │ │ ├── analyzer_en_test.go
│ │ │ ├── plural_stemmer.go
│ │ │ ├── plural_stemmer_test.go
│ │ │ ├── possessive_filter_en.go
│ │ │ ├── possessive_filter_en_test.go
│ │ │ ├── stemmer_en_snowball.go
│ │ │ ├── stemmer_en_test.go
│ │ │ ├── stop_filter_en.go
│ │ │ └── stop_words_en.go
│ │ ├── es/
│ │ │ ├── analyzer_es.go
│ │ │ ├── analyzer_es_test.go
│ │ │ ├── light_stemmer_es.go
│ │ │ ├── spanish_normalize.go
│ │ │ ├── spanish_normalize_test.go
│ │ │ ├── stemmer_es_snowball.go
│ │ │ ├── stemmer_es_snowball_test.go
│ │ │ ├── stop_filter_es.go
│ │ │ └── stop_words_es.go
│ │ ├── eu/
│ │ │ ├── stop_filter_eu.go
│ │ │ └── stop_words_eu.go
│ │ ├── fa/
│ │ │ ├── analyzer_fa.go
│ │ │ ├── analyzer_fa_test.go
│ │ │ ├── persian_normalize.go
│ │ │ ├── persian_normalize_test.go
│ │ │ ├── stop_filter_fa.go
│ │ │ └── stop_words_fa.go
│ │ ├── fi/
│ │ │ ├── analyzer_fi.go
│ │ │ ├── analyzer_fi_test.go
│ │ │ ├── stemmer_fi.go
│ │ │ ├── stop_filter_fi.go
│ │ │ └── stop_words_fi.go
│ │ ├── fr/
│ │ │ ├── analyzer_fr.go
│ │ │ ├── analyzer_fr_test.go
│ │ │ ├── articles_fr.go
│ │ │ ├── elision_fr.go
│ │ │ ├── elision_fr_test.go
│ │ │ ├── light_stemmer_fr.go
│ │ │ ├── light_stemmer_fr_test.go
│ │ │ ├── minimal_stemmer_fr.go
│ │ │ ├── minimal_stemmer_fr_test.go
│ │ │ ├── stemmer_fr_snowball.go
│ │ │ ├── stemmer_fr_snowball_test.go
│ │ │ ├── stop_filter_fr.go
│ │ │ └── stop_words_fr.go
│ │ ├── ga/
│ │ │ ├── articles_ga.go
│ │ │ ├── elision_ga.go
│ │ │ ├── elision_ga_test.go
│ │ │ ├── stop_filter_ga.go
│ │ │ └── stop_words_ga.go
│ │ ├── gl/
│ │ │ ├── stop_filter_gl.go
│ │ │ └── stop_words_gl.go
│ │ ├── hi/
│ │ │ ├── analyzer_hi.go
│ │ │ ├── analyzer_hi_test.go
│ │ │ ├── hindi_normalize.go
│ │ │ ├── hindi_normalize_test.go
│ │ │ ├── hindi_stemmer_filter.go
│ │ │ ├── hindi_stemmer_filter_test.go
│ │ │ ├── stop_filter_hi.go
│ │ │ └── stop_words_hi.go
│ │ ├── hr/
│ │ │ ├── analyzer_hr.go
│ │ │ ├── analyzer_hr_test.go
│ │ │ ├── stemmer_hr.go
│ │ │ ├── stop_filter_hr.go
│ │ │ ├── stop_words_hr.go
│ │ │ └── suffix_transformation_hr.go
│ │ ├── hu/
│ │ │ ├── analyzer_hu.go
│ │ │ ├── analyzer_hu_test.go
│ │ │ ├── stemmer_hu.go
│ │ │ ├── stop_filter_hu.go
│ │ │ └── stop_words_hu.go
│ │ ├── hy/
│ │ │ ├── stop_filter_hy.go
│ │ │ └── stop_words_hy.go
│ │ ├── id/
│ │ │ ├── stop_filter_id.go
│ │ │ └── stop_words_id.go
│ │ ├── in/
│ │ │ ├── indic_normalize.go
│ │ │ ├── indic_normalize_test.go
│ │ │ └── scripts.go
│ │ ├── it/
│ │ │ ├── analyzer_it.go
│ │ │ ├── analyzer_it_test.go
│ │ │ ├── articles_it.go
│ │ │ ├── elision_it.go
│ │ │ ├── elision_it_test.go
│ │ │ ├── light_stemmer_it.go
│ │ │ ├── light_stemmer_it_test.go
│ │ │ ├── stemmer_it_snowball.go
│ │ │ ├── stemmer_it_snowball_test.go
│ │ │ ├── stop_filter_it.go
│ │ │ └── stop_words_it.go
│ │ ├── nl/
│ │ │ ├── analyzer_nl.go
│ │ │ ├── analyzer_nl_test.go
│ │ │ ├── stemmer_nl.go
│ │ │ ├── stop_filter_nl.go
│ │ │ └── stop_words_nl.go
│ │ ├── no/
│ │ │ ├── analyzer_no.go
│ │ │ ├── analyzer_no_test.go
│ │ │ ├── stemmer_no.go
│ │ │ ├── stop_filter_no.go
│ │ │ └── stop_words_no.go
│ │ ├── pl/
│ │ │ ├── analyzer_pl.go
│ │ │ ├── analyzer_pl_test.go
│ │ │ ├── stemmer_pl.go
│ │ │ ├── stemmer_pl_test.go
│ │ │ ├── stempel/
│ │ │ │ ├── LICENSE
│ │ │ │ ├── cell.go
│ │ │ │ ├── diff.go
│ │ │ │ ├── diff_test.go
│ │ │ │ ├── file.go
│ │ │ │ ├── file_test.go
│ │ │ │ ├── fuzz.go
│ │ │ │ ├── javadata/
│ │ │ │ │ ├── README.md
│ │ │ │ │ ├── fuzz.go
│ │ │ │ │ ├── input.go
│ │ │ │ │ └── input_test.go
│ │ │ │ ├── multi_trie.go
│ │ │ │ ├── pl/
│ │ │ │ │ └── stemmer_20000.tbl
│ │ │ │ ├── row.go
│ │ │ │ ├── strenum.go
│ │ │ │ ├── strenum_test.go
│ │ │ │ └── trie.go
│ │ │ ├── stop_filter_pl.go
│ │ │ └── stop_words_pl.go
│ │ ├── pt/
│ │ │ ├── analyzer_pt.go
│ │ │ ├── analyzer_pt_test.go
│ │ │ ├── light_stemmer_pt.go
│ │ │ ├── light_stemmer_pt_test.go
│ │ │ ├── stop_filter_pt.go
│ │ │ └── stop_words_pt.go
│ │ ├── ro/
│ │ │ ├── analyzer_ro.go
│ │ │ ├── analyzer_ro_test.go
│ │ │ ├── stemmer_ro.go
│ │ │ ├── stop_filter_ro.go
│ │ │ └── stop_words_ro.go
│ │ ├── ru/
│ │ │ ├── analyzer_ru.go
│ │ │ ├── analyzer_ru_test.go
│ │ │ ├── stemmer_ru.go
│ │ │ ├── stemmer_ru_test.go
│ │ │ ├── stop_filter_ru.go
│ │ │ └── stop_words_ru.go
│ │ ├── sv/
│ │ │ ├── analyzer_sv.go
│ │ │ ├── analyzer_sv_test.go
│ │ │ ├── stemmer_sv.go
│ │ │ ├── stop_filter_sv.go
│ │ │ └── stop_words_sv.go
│ │ └── tr/
│ │ ├── analyzer_tr.go
│ │ ├── analyzer_tr_test.go
│ │ ├── stemmer_tr.go
│ │ ├── stemmer_tr_test.go
│ │ ├── stop_filter_tr.go
│ │ └── stop_words_tr.go
│ ├── test_words.txt
│ ├── token/
│ │ ├── apostrophe/
│ │ │ ├── apostrophe.go
│ │ │ └── apostrophe_test.go
│ │ ├── camelcase/
│ │ │ ├── camelcase.go
│ │ │ ├── camelcase_test.go
│ │ │ ├── parser.go
│ │ │ └── states.go
│ │ ├── compound/
│ │ │ ├── dict.go
│ │ │ └── dict_test.go
│ │ ├── edgengram/
│ │ │ ├── edgengram.go
│ │ │ └── edgengram_test.go
│ │ ├── elision/
│ │ │ ├── elision.go
│ │ │ └── elision_test.go
│ │ ├── hierarchy/
│ │ │ ├── hierarchy.go
│ │ │ └── hierarchy_test.go
│ │ ├── keyword/
│ │ │ ├── keyword.go
│ │ │ └── keyword_test.go
│ │ ├── length/
│ │ │ ├── length.go
│ │ │ └── length_test.go
│ │ ├── lowercase/
│ │ │ ├── lowercase.go
│ │ │ └── lowercase_test.go
│ │ ├── ngram/
│ │ │ ├── ngram.go
│ │ │ └── ngram_test.go
│ │ ├── porter/
│ │ │ ├── porter.go
│ │ │ └── porter_test.go
│ │ ├── reverse/
│ │ │ ├── reverse.go
│ │ │ └── reverse_test.go
│ │ ├── shingle/
│ │ │ ├── shingle.go
│ │ │ └── shingle_test.go
│ │ ├── snowball/
│ │ │ ├── snowball.go
│ │ │ └── snowball_test.go
│ │ ├── stop/
│ │ │ ├── stop.go
│ │ │ └── stop_test.go
│ │ ├── truncate/
│ │ │ ├── truncate.go
│ │ │ └── truncate_test.go
│ │ ├── unicodenorm/
│ │ │ ├── unicodenorm.go
│ │ │ └── unicodenorm_test.go
│ │ └── unique/
│ │ ├── unique.go
│ │ └── unique_test.go
│ ├── tokenizer/
│ │ ├── character/
│ │ │ ├── character.go
│ │ │ └── character_test.go
│ │ ├── exception/
│ │ │ ├── exception.go
│ │ │ └── exception_test.go
│ │ ├── letter/
│ │ │ └── letter.go
│ │ ├── regexp/
│ │ │ ├── regexp.go
│ │ │ └── regexp_test.go
│ │ ├── single/
│ │ │ ├── single.go
│ │ │ └── single_test.go
│ │ ├── unicode/
│ │ │ ├── unicode.go
│ │ │ └── unicode_test.go
│ │ ├── web/
│ │ │ ├── web.go
│ │ │ └── web_test.go
│ │ └── whitespace/
│ │ ├── whitespace.go
│ │ └── whitespace_test.go
│ ├── tokenmap/
│ │ └── custom.go
│ ├── tokenmap.go
│ ├── tokenmap_test.go
│ ├── type.go
│ ├── util.go
│ └── util_test.go
├── builder.go
├── builder_test.go
├── cmd/
│ └── bleve/
│ ├── cmd/
│ │ ├── bulk.go
│ │ ├── check.go
│ │ ├── count.go
│ │ ├── create.go
│ │ ├── dictionary.go
│ │ ├── dump.go
│ │ ├── dumpDoc.go
│ │ ├── dumpFields.go
│ │ ├── fields.go
│ │ ├── index.go
│ │ ├── mapping.go
│ │ ├── query.go
│ │ ├── registry.go
│ │ ├── root.go
│ │ ├── scorch/
│ │ │ ├── ascii.go
│ │ │ ├── deleted.go
│ │ │ ├── info.go
│ │ │ ├── internal.go
│ │ │ ├── root.go
│ │ │ └── snapshot.go
│ │ └── scorch.go
│ ├── gendocs.go
│ └── main.go
├── config/
│ ├── README.md
│ └── config.go
├── config.go
├── config_app.go
├── config_disk.go
├── data/
│ └── test/
│ └── sample-data.json
├── doc.go
├── docs/
│ ├── create_and_search_your_first_index.md
│ ├── geo.md
│ ├── hierarchy.md
│ ├── index_update.md
│ ├── pagination.md
│ ├── persister.md
│ ├── query-openapi-spec.yaml
│ ├── score_fusion.md
│ ├── scoring.md
│ ├── search_autocomplete.md
│ ├── sort_facet.md
│ ├── synonyms.md
│ └── vectors.md
├── document/
│ ├── document.go
│ ├── document_test.go
│ ├── field.go
│ ├── field_boolean.go
│ ├── field_composite.go
│ ├── field_datetime.go
│ ├── field_geopoint.go
│ ├── field_geopoint_test.go
│ ├── field_geoshape.go
│ ├── field_ip.go
│ ├── field_ip_test.go
│ ├── field_numeric.go
│ ├── field_numeric_test.go
│ ├── field_synonym.go
│ ├── field_text.go
│ ├── field_vector.go
│ ├── field_vector_base64.go
│ └── field_vector_base64_test.go
├── error.go
├── examples_test.go
├── fusion/
│ ├── fusion.go
│ ├── rrf.go
│ ├── rrf_test.go
│ ├── rsf.go
│ ├── rsf_test.go
│ └── util.go
├── geo/
│ ├── README.md
│ ├── benchmark_geohash_test.go
│ ├── geo.go
│ ├── geo_dist.go
│ ├── geo_dist_test.go
│ ├── geo_s2plugin_impl.go
│ ├── geo_test.go
│ ├── geohash.go
│ ├── geohash_test.go
│ ├── parse.go
│ ├── parse_test.go
│ ├── sloppy.go
│ └── versus_test.go
├── go.mod
├── index/
│ ├── scorch/
│ │ ├── README.md
│ │ ├── builder.go
│ │ ├── builder_test.go
│ │ ├── empty.go
│ │ ├── event.go
│ │ ├── event_test.go
│ │ ├── field_dict_test.go
│ │ ├── int.go
│ │ ├── int_test.go
│ │ ├── introducer.go
│ │ ├── merge.go
│ │ ├── merge_test.go
│ │ ├── mergeplan/
│ │ │ ├── merge_plan.go
│ │ │ ├── merge_plan_test.go
│ │ │ └── sort.go
│ │ ├── optimize.go
│ │ ├── optimize_knn.go
│ │ ├── persister.go
│ │ ├── reader_test.go
│ │ ├── regexp.go
│ │ ├── regexp_test.go
│ │ ├── rollback.go
│ │ ├── rollback_test.go
│ │ ├── scorch.go
│ │ ├── scorch_test.go
│ │ ├── segment_plugin.go
│ │ ├── snapshot_index.go
│ │ ├── snapshot_index_dict.go
│ │ ├── snapshot_index_doc.go
│ │ ├── snapshot_index_str.go
│ │ ├── snapshot_index_test.go
│ │ ├── snapshot_index_tfr.go
│ │ ├── snapshot_index_thes.go
│ │ ├── snapshot_index_vr.go
│ │ ├── snapshot_segment.go
│ │ ├── snapshot_vector_index.go
│ │ ├── stats.go
│ │ └── unadorned.go
│ └── upsidedown/
│ ├── analysis.go
│ ├── analysis_test.go
│ ├── benchmark_all.sh
│ ├── benchmark_boltdb_test.go
│ ├── benchmark_common_test.go
│ ├── benchmark_gtreap_test.go
│ ├── benchmark_null_test.go
│ ├── dump.go
│ ├── dump_test.go
│ ├── field_cache.go
│ ├── field_dict.go
│ ├── field_dict_test.go
│ ├── index_reader.go
│ ├── protoc-README.md
│ ├── reader.go
│ ├── reader_test.go
│ ├── row.go
│ ├── row_merge.go
│ ├── row_merge_test.go
│ ├── row_test.go
│ ├── stats.go
│ ├── store/
│ │ ├── boltdb/
│ │ │ ├── iterator.go
│ │ │ ├── reader.go
│ │ │ ├── stats.go
│ │ │ ├── store.go
│ │ │ ├── store_test.go
│ │ │ └── writer.go
│ │ ├── goleveldb/
│ │ │ ├── batch.go
│ │ │ ├── config.go
│ │ │ ├── iterator.go
│ │ │ ├── reader.go
│ │ │ ├── store.go
│ │ │ ├── store_test.go
│ │ │ └── writer.go
│ │ ├── gtreap/
│ │ │ ├── iterator.go
│ │ │ ├── reader.go
│ │ │ ├── store.go
│ │ │ ├── store_test.go
│ │ │ └── writer.go
│ │ ├── metrics/
│ │ │ ├── batch.go
│ │ │ ├── iterator.go
│ │ │ ├── metrics_test.go
│ │ │ ├── reader.go
│ │ │ ├── stats.go
│ │ │ ├── store.go
│ │ │ ├── store_test.go
│ │ │ ├── util.go
│ │ │ └── writer.go
│ │ ├── moss/
│ │ │ ├── batch.go
│ │ │ ├── iterator.go
│ │ │ ├── lower.go
│ │ │ ├── lower_test.go
│ │ │ ├── reader.go
│ │ │ ├── stats.go
│ │ │ ├── store.go
│ │ │ ├── store_test.go
│ │ │ └── writer.go
│ │ └── null/
│ │ ├── null.go
│ │ └── null_test.go
│ ├── upsidedown.go
│ ├── upsidedown.pb.go
│ ├── upsidedown.proto
│ └── upsidedown_test.go
├── index.go
├── index_alias.go
├── index_alias_impl.go
├── index_alias_impl_test.go
├── index_impl.go
├── index_meta.go
├── index_meta_test.go
├── index_stats.go
├── index_test.go
├── index_update.go
├── index_update_test.go
├── mapping/
│ ├── analysis.go
│ ├── document.go
│ ├── examples_test.go
│ ├── field.go
│ ├── index.go
│ ├── mapping.go
│ ├── mapping_no_vectors.go
│ ├── mapping_test.go
│ ├── mapping_vectors.go
│ ├── mapping_vectors_test.go
│ ├── reflect.go
│ ├── reflect_test.go
│ └── synonym.go
├── mapping.go
├── mapping_vector.go
├── numeric/
│ ├── bin.go
│ ├── bin_test.go
│ ├── float.go
│ ├── float_test.go
│ ├── prefix_coded.go
│ └── prefix_coded_test.go
├── pre_search.go
├── query.go
├── query_bench_test.go
├── registry/
│ ├── analyzer.go
│ ├── cache.go
│ ├── char_filter.go
│ ├── datetime_parser.go
│ ├── fragment_formatter.go
│ ├── fragmenter.go
│ ├── highlighter.go
│ ├── index_type.go
│ ├── nested.go
│ ├── registry.go
│ ├── store.go
│ ├── synonym_source.go
│ ├── token_filter.go
│ ├── token_maps.go
│ └── tokenizer.go
├── rescorer.go
├── rescorer_knn_test.go
├── rescorer_test.go
├── search/
│ ├── collector/
│ │ ├── bench_test.go
│ │ ├── eligible.go
│ │ ├── heap.go
│ │ ├── knn.go
│ │ ├── list.go
│ │ ├── nested.go
│ │ ├── search_test.go
│ │ ├── slice.go
│ │ ├── topn.go
│ │ └── topn_test.go
│ ├── collector.go
│ ├── explanation.go
│ ├── facet/
│ │ ├── benchmark_data.txt
│ │ ├── facet_builder_datetime.go
│ │ ├── facet_builder_numeric.go
│ │ ├── facet_builder_numeric_test.go
│ │ ├── facet_builder_terms.go
│ │ └── facet_builder_terms_test.go
│ ├── facets_builder.go
│ ├── facets_builder_test.go
│ ├── highlight/
│ │ ├── format/
│ │ │ ├── ansi/
│ │ │ │ └── ansi.go
│ │ │ ├── html/
│ │ │ │ ├── html.go
│ │ │ │ └── html_test.go
│ │ │ └── plain/
│ │ │ ├── plain.go
│ │ │ └── plain_test.go
│ │ ├── fragmenter/
│ │ │ └── simple/
│ │ │ ├── simple.go
│ │ │ └── simple_test.go
│ │ ├── highlighter/
│ │ │ ├── ansi/
│ │ │ │ └── ansi.go
│ │ │ ├── html/
│ │ │ │ └── html.go
│ │ │ └── simple/
│ │ │ ├── fragment_scorer_simple.go
│ │ │ ├── fragment_scorer_simple_test.go
│ │ │ ├── highlighter_simple.go
│ │ │ └── highlighter_simple_test.go
│ │ ├── highlighter.go
│ │ ├── term_locations.go
│ │ └── term_locations_test.go
│ ├── levenshtein.go
│ ├── levenshtein_test.go
│ ├── pool.go
│ ├── pool_test.go
│ ├── query/
│ │ ├── bool_field.go
│ │ ├── boolean.go
│ │ ├── boost.go
│ │ ├── conjunction.go
│ │ ├── date_range.go
│ │ ├── date_range_string.go
│ │ ├── date_range_test.go
│ │ ├── disjunction.go
│ │ ├── docid.go
│ │ ├── fuzzy.go
│ │ ├── geo_boundingbox.go
│ │ ├── geo_boundingpolygon.go
│ │ ├── geo_distance.go
│ │ ├── geo_shape.go
│ │ ├── ip_range.go
│ │ ├── knn.go
│ │ ├── match.go
│ │ ├── match_all.go
│ │ ├── match_none.go
│ │ ├── match_phrase.go
│ │ ├── match_phrase_test.go
│ │ ├── multi_phrase.go
│ │ ├── numeric_range.go
│ │ ├── phrase.go
│ │ ├── prefix.go
│ │ ├── query.go
│ │ ├── query_string.go
│ │ ├── query_string.y
│ │ ├── query_string.y.go
│ │ ├── query_string_lex.go
│ │ ├── query_string_lex_test.go
│ │ ├── query_string_parser.go
│ │ ├── query_string_parser_test.go
│ │ ├── query_test.go
│ │ ├── regexp.go
│ │ ├── term.go
│ │ ├── term_range.go
│ │ └── wildcard.go
│ ├── scorer/
│ │ ├── scorer_conjunction.go
│ │ ├── scorer_constant.go
│ │ ├── scorer_constant_test.go
│ │ ├── scorer_disjunction.go
│ │ ├── scorer_knn.go
│ │ ├── scorer_knn_test.go
│ │ ├── scorer_term.go
│ │ ├── scorer_term_test.go
│ │ └── sqrt_cache.go
│ ├── search.go
│ ├── search_test.go
│ ├── searcher/
│ │ ├── base_test.go
│ │ ├── geoshape_contains_test.go
│ │ ├── geoshape_intersects_test.go
│ │ ├── geoshape_within_test.go
│ │ ├── optimize_knn.go
│ │ ├── optimize_no_knn.go
│ │ ├── ordered_searchers_list.go
│ │ ├── search_boolean.go
│ │ ├── search_boolean_test.go
│ │ ├── search_conjunction.go
│ │ ├── search_conjunction_nested.go
│ │ ├── search_conjunction_test.go
│ │ ├── search_disjunction.go
│ │ ├── search_disjunction_heap.go
│ │ ├── search_disjunction_slice.go
│ │ ├── search_disjunction_test.go
│ │ ├── search_docid.go
│ │ ├── search_docid_test.go
│ │ ├── search_filter.go
│ │ ├── search_fuzzy.go
│ │ ├── search_fuzzy_test.go
│ │ ├── search_geoboundingbox.go
│ │ ├── search_geoboundingbox_test.go
│ │ ├── search_geopointdistance.go
│ │ ├── search_geopointdistance_test.go
│ │ ├── search_geopolygon.go
│ │ ├── search_geopolygon_test.go
│ │ ├── search_geoshape.go
│ │ ├── search_geoshape_circle_test.go
│ │ ├── search_geoshape_envelope_test.go
│ │ ├── search_geoshape_geometrycollection_test.go
│ │ ├── search_geoshape_linestring_test.go
│ │ ├── search_geoshape_points_test.go
│ │ ├── search_geoshape_polygon_test.go
│ │ ├── search_ip_range.go
│ │ ├── search_ip_range_test.go
│ │ ├── search_knn.go
│ │ ├── search_match_all.go
│ │ ├── search_match_all_test.go
│ │ ├── search_match_none.go
│ │ ├── search_match_none_test.go
│ │ ├── search_multi_term.go
│ │ ├── search_numeric_range.go
│ │ ├── search_numeric_range_test.go
│ │ ├── search_phrase.go
│ │ ├── search_phrase_test.go
│ │ ├── search_regexp.go
│ │ ├── search_regexp_test.go
│ │ ├── search_term.go
│ │ ├── search_term_prefix.go
│ │ ├── search_term_range.go
│ │ ├── search_term_range_test.go
│ │ └── search_term_test.go
│ ├── sort.go
│ ├── sort_test.go
│ ├── util.go
│ └── util_test.go
├── search.go
├── search_knn.go
├── search_knn_test.go
├── search_nested_test.go
├── search_no_knn.go
├── search_test.go
├── size/
│ └── sizes.go
├── test/
│ ├── integration.go
│ ├── integration_test.go
│ ├── ip_field_test.go
│ ├── tests/
│ │ ├── alias/
│ │ │ ├── datasets/
│ │ │ │ ├── shard0/
│ │ │ │ │ ├── a.json
│ │ │ │ │ └── c.json
│ │ │ │ └── shard1/
│ │ │ │ ├── b.json
│ │ │ │ └── d.json
│ │ │ ├── mapping.json
│ │ │ └── searches.json
│ │ ├── basic/
│ │ │ ├── data/
│ │ │ │ ├── a.json
│ │ │ │ ├── b.json
│ │ │ │ ├── c.json
│ │ │ │ └── d.json
│ │ │ ├── mapping.json
│ │ │ └── searches.json
│ │ ├── employee/
│ │ │ ├── data/
│ │ │ │ └── emp10508560.json
│ │ │ ├── mapping.json
│ │ │ └── searches.json
│ │ ├── facet/
│ │ │ ├── data/
│ │ │ │ ├── a.json
│ │ │ │ ├── b.json
│ │ │ │ ├── c.json
│ │ │ │ ├── d.json
│ │ │ │ ├── e.json
│ │ │ │ ├── f.json
│ │ │ │ ├── g.json
│ │ │ │ ├── h.json
│ │ │ │ ├── i.json
│ │ │ │ └── j.json
│ │ │ ├── mapping.json
│ │ │ └── searches.json
│ │ ├── fosdem/
│ │ │ ├── data/
│ │ │ │ ├── 3311@FOSDEM15@fosdem.org.json
│ │ │ │ ├── 3492@FOSDEM15@fosdem.org.json
│ │ │ │ ├── 3496@FOSDEM15@fosdem.org.json
│ │ │ │ ├── 3505@FOSDEM15@fosdem.org.json
│ │ │ │ └── 3507@FOSDEM15@fosdem.org.json
│ │ │ ├── mapping.json
│ │ │ └── searches.json
│ │ ├── geo/
│ │ │ ├── data/
│ │ │ │ ├── amoeba_brewery.json
│ │ │ │ ├── brewpub_on_the_green.json
│ │ │ │ ├── capital_city_brewing_company.json
│ │ │ │ ├── communiti_brewery.json
│ │ │ │ ├── firehouse_grill_brewery.json
│ │ │ │ ├── hook_ladder_brewing_company.json
│ │ │ │ ├── jack_s_brewing.json
│ │ │ │ ├── social_brewery.json
│ │ │ │ └── sweet_water_tavern_and_brewery.json
│ │ │ ├── mapping.json
│ │ │ └── searches.json
│ │ ├── geoshapes/
│ │ │ ├── data/
│ │ │ │ ├── circle_halairport.json
│ │ │ │ ├── envelope_brockwell_park.json
│ │ │ │ ├── geometrycollection_tvm.json
│ │ │ │ ├── linestring_putney_bridge.json
│ │ │ │ ├── multilinestring_old_airport_road.json
│ │ │ │ ├── multipoint_blr_stadiums.json
│ │ │ │ ├── multipolygon_london_parks.json
│ │ │ │ ├── point_museum_of_london.json
│ │ │ │ └── polygon_cubbonpark.json
│ │ │ ├── mapping.json
│ │ │ └── searches.json
│ │ ├── phrase/
│ │ │ ├── data/
│ │ │ │ ├── a.json
│ │ │ │ └── b.json
│ │ │ ├── mapping.json
│ │ │ └── searches.json
│ │ └── sort/
│ │ ├── data/
│ │ │ ├── a.json
│ │ │ ├── b.json
│ │ │ ├── c.json
│ │ │ ├── d.json
│ │ │ ├── e.json
│ │ │ └── f.json
│ │ ├── mapping.json
│ │ └── searches.json
│ ├── versus_score_test.go
│ └── versus_test.go
└── util/
├── extract.go
├── json.go
└── keys.go
================================================
FILE CONTENTS
================================================
================================================
FILE: .github/workflows/cover.yml
================================================
on:
push:
branches:
- master
pull_request:
name: Coverage
jobs:
coverage:
runs-on: ubuntu-latest
steps:
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: '1.25.x'
- name: Checkout code
uses: actions/checkout@v2
- name: Clean environment
run: |
go clean -cache -testcache -modcache
rm -f profile.cov
- name: Test
run: |
go test -coverprofile=profile.cov ./...
- name: Remove non-GO entries from coverage profile
run: |
grep -E 'mode|\.go' profile.cov > profile_go.cov
- name: Send coverage
uses: shogo82148/actions-goveralls@v1
with:
path-to-profile: profile_go.cov
================================================
FILE: .github/workflows/tests.yml
================================================
on:
push:
branches:
- master
pull_request:
name: Tests
jobs:
test:
strategy:
matrix:
go-version: [1.23.x, 1.24.x, 1.25.x]
platform: [ubuntu-latest, macos-latest, windows-latest]
runs-on: ${{ matrix.platform }}
steps:
- name: Install Go
uses: actions/setup-go@v5
with:
go-version: ${{ matrix.go-version }}
- name: Checkout code
uses: actions/checkout@v2
- name: Test
run: |
go version
go test -race ./...
================================================
FILE: .gitignore
================================================
#*
*.sublime-*
*~
.#*
.project
.settings
**/.idea/
**/*.iml
.DS_Store
query_string.y.go.tmp
/analysis/token_filters/cld2/cld2-read-only
/analysis/token_filters/cld2/libcld2_full.a
/cmd/bleve/bleve
vendor/**
!vendor/manifest
/y.output
/search/query/y.output
*.test
tags
go.sum
================================================
FILE: CONTRIBUTING.md
================================================
# Contributing to Bleve
We look forward to your contributions, but ask that you first review these guidelines.
## Sign the CLA
As Bleve is a Couchbase project we require contributors accept the [Couchbase Contributor License Agreement](http://review.couchbase.org/static/individual_agreement.html). To sign this agreement log into the Couchbase [code review tool](http://review.couchbase.org/). The Bleve project does not use this code review tool but it is still used to track acceptance of the contributor license agreements.
## Submitting a Pull Request
All types of contributions are welcome, but please keep the following in mind:
- If you're planning a large change, you should really discuss it in a github issue or on the google group first. This helps avoid duplicate effort and spending time on something that may not be merged.
- Existing tests should continue to pass, new tests for the contribution are nice to have.
- All code should have gone through `go fmt`
- All code should pass `go vet`
================================================
FILE: LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: README.md
================================================
#  bleve
[](https://github.com/blevesearch/bleve/actions/workflows/tests.yml?query=event%3Apush+branch%3Amaster)
[](https://coveralls.io/github/blevesearch/bleve)
[](https://pkg.go.dev/github.com/blevesearch/bleve/v2)
[](https://app.gitter.im/#/room/#blevesearch_bleve:gitter.im)
[](https://goreportcard.com/report/github.com/blevesearch/bleve/v2)
[](https://sourcegraph.com/github.com/blevesearch/bleve?badge)
[](https://opensource.org/licenses/Apache-2.0)
A modern indexing + search library in GO
## Features
* Index any GO data structure or JSON
* Intelligent defaults backed up by powerful configuration ([scorch](https://github.com/blevesearch/bleve/blob/master/index/scorch/README.md))
* Supported field types:
* `text`, `number`, `datetime`, `boolean`, `geopoint`, `geoshape`, `IP`, `vector`
* Supported query types:
* `term`, `phrase`, `match`, `match_phrase`, `prefix`, `regexp`, `wildcard`, `fuzzy`
* term range, numeric range, date range, boolean field
* compound queries: `conjuncts`, `disjuncts`, boolean (`must`/`should`/`must_not`)
* [query string syntax](http://www.blevesearch.com/docs/Query-String-Query/)
* [geo spatial search](https://github.com/blevesearch/bleve/blob/master/geo/README.md)
* approximate k-nearest neighbors via [vector search](https://github.com/blevesearch/bleve/blob/master/docs/vectors.md)
* [synonym search](https://github.com/blevesearch/bleve/blob/master/docs/synonyms.md)
* [hierarchical nested search](https://github.com/blevesearch/bleve/blob/master/docs/hierarchy.md)
* [tf-idf](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md#tf-idf) / [bm25](https://github.com/blevesearch/bleve/blob/master/docs/scoring.md#bm25) scoring models
* Hybrid search: exact + semantic
* Supports [RRF (Reciprocal Rank Fusion) and RSF (Relative Score Fusion)](docs/score_fusion.md)
* [Result pagination](https://github.com/blevesearch/bleve/blob/master/docs/pagination.md)
* Query time boosting
* Search result match highlighting with document fragments
* Aggregations/faceting support:
* terms facet
* numeric range facet
* date range facet
## Indexing
```go
message := struct {
Id string
From string
Body string
}{
Id: "example",
From: "xyz@couchbase.com",
Body: "bleve indexing is easy",
}
mapping := bleve.NewIndexMapping()
index, err := bleve.New("example.bleve", mapping)
if err != nil {
panic(err)
}
index.Index(message.Id, message)
```
## Querying
```go
index, _ := bleve.Open("example.bleve")
query := bleve.NewQueryStringQuery("bleve")
searchRequest := bleve.NewSearchRequest(query)
searchResult, _ := index.Search(searchRequest)
```
## Command Line Interface
To install the CLI for the latest release of bleve, run:
```bash
go install github.com/blevesearch/bleve/v2/cmd/bleve@latest
```
```text
$ bleve --help
Bleve is a command-line tool to interact with a bleve index.
Usage:
bleve [command]
Available Commands:
bulk bulk loads from newline delimited JSON files
check checks the contents of the index
count counts the number documents in the index
create creates a new index
dictionary prints the term dictionary for the specified field in the index
dump dumps the contents of the index
fields lists the fields in this index
help Help about any command
index adds the files to the index
mapping prints the mapping used for this index
query queries the index
registry registry lists the bleve components compiled into this executable
scorch command-line tool to interact with a scorch index
Flags:
-h, --help help for bleve
Use "bleve [command] --help" for more information about a command.
```
## Text Analysis
Bleve includes general-purpose analyzers (customizable) as well as pre-built text analyzers for the following languages:
Arabic (ar), Bulgarian (bg), Catalan (ca), Chinese-Japanese-Korean (cjk), Kurdish (ckb), Danish (da), German (de), Greek (el), English (en), Spanish - Castilian (es), Basque (eu), Persian (fa), Finnish (fi), French (fr), Gaelic (ga), Spanish - Galician (gl), Hindi (hi), Croatian (hr), Hungarian (hu), Armenian (hy), Indonesian (id, in), Italian (it), Dutch (nl), Norwegian (no), Polish (pl), Portuguese (pt), Romanian (ro), Russian (ru), Swedish (sv), Turkish (tr)
## Text Analysis Wizard
[bleveanalysis.couchbase.com](https://bleveanalysis.couchbase.com)
## Discussion/Issues
Discuss usage/development of bleve and/or report issues here:
* [Github issues](https://github.com/blevesearch/bleve/issues)
* [Google group](https://groups.google.com/forum/#!forum/bleve)
## License
Apache License Version 2.0
================================================
FILE: SECURITY.md
================================================
# Security Policy
## Supported Versions
We support the latest release (for example, bleve v2.5.x).
## Reporting a Vulnerability
All security issues for this project should be reported via email to [security@couchbase.com](mailto:security@couchbase.com) and [fts-team@couchbase.com](mailto:fts-team@couchbase.com).
This mail will be delivered to the owners of this project.
- To ensure your report is NOT marked as spam, please include the word "security/vulnerability" along with the project name (blevesearch/bleve) in the subject of the email.
- Please be as descriptive as possible while explaining the issue, and a testcase highlighting the issue is always welcome.
Your email will be acknowledged at the soonest possible.
================================================
FILE: analysis/analyzer/custom/custom.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package custom
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "custom"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
var err error
var charFilters []analysis.CharFilter
charFiltersValue, ok := config["char_filters"]
if ok {
switch charFiltersValue := charFiltersValue.(type) {
case []string:
charFilters, err = getCharFilters(charFiltersValue, cache)
if err != nil {
return nil, err
}
case []interface{}:
charFiltersNames, err := convertInterfaceSliceToStringSlice(charFiltersValue, "char filter")
if err != nil {
return nil, err
}
charFilters, err = getCharFilters(charFiltersNames, cache)
if err != nil {
return nil, err
}
default:
return nil, fmt.Errorf("unsupported type for char_filters, must be slice")
}
}
var tokenizerName string
tokenizerValue, ok := config["tokenizer"]
if ok {
tokenizerName, ok = tokenizerValue.(string)
if !ok {
return nil, fmt.Errorf("must specify tokenizer as string")
}
} else {
return nil, fmt.Errorf("must specify tokenizer")
}
tokenizer, err := cache.TokenizerNamed(tokenizerName)
if err != nil {
return nil, err
}
var tokenFilters []analysis.TokenFilter
tokenFiltersValue, ok := config["token_filters"]
if ok {
switch tokenFiltersValue := tokenFiltersValue.(type) {
case []string:
tokenFilters, err = getTokenFilters(tokenFiltersValue, cache)
if err != nil {
return nil, err
}
case []interface{}:
tokenFiltersNames, err := convertInterfaceSliceToStringSlice(tokenFiltersValue, "token filter")
if err != nil {
return nil, err
}
tokenFilters, err = getTokenFilters(tokenFiltersNames, cache)
if err != nil {
return nil, err
}
default:
return nil, fmt.Errorf("unsupported type for token_filters, must be slice")
}
}
rv := analysis.DefaultAnalyzer{
Tokenizer: tokenizer,
}
if charFilters != nil {
rv.CharFilters = charFilters
}
if tokenFilters != nil {
rv.TokenFilters = tokenFilters
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(Name, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
func getCharFilters(charFilterNames []string, cache *registry.Cache) ([]analysis.CharFilter, error) {
charFilters := make([]analysis.CharFilter, len(charFilterNames))
for i, charFilterName := range charFilterNames {
charFilter, err := cache.CharFilterNamed(charFilterName)
if err != nil {
return nil, err
}
charFilters[i] = charFilter
}
return charFilters, nil
}
func getTokenFilters(tokenFilterNames []string, cache *registry.Cache) ([]analysis.TokenFilter, error) {
tokenFilters := make([]analysis.TokenFilter, len(tokenFilterNames))
for i, tokenFilterName := range tokenFilterNames {
tokenFilter, err := cache.TokenFilterNamed(tokenFilterName)
if err != nil {
return nil, err
}
tokenFilters[i] = tokenFilter
}
return tokenFilters, nil
}
func convertInterfaceSliceToStringSlice(interfaceSlice []interface{}, objType string) ([]string, error) {
stringSlice := make([]string, len(interfaceSlice))
for i, interfaceObj := range interfaceSlice {
stringObj, ok := interfaceObj.(string)
if ok {
stringSlice[i] = stringObj
} else {
return nil, fmt.Errorf("%s name must be a string", objType)
}
}
return stringSlice, nil
}
================================================
FILE: analysis/analyzer/keyword/keyword.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package keyword
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "keyword"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
keywordTokenizer, err := cache.TokenizerNamed(single.Name)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: keywordTokenizer,
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(Name, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/analyzer/simple/simple.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package simple
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/letter"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "simple"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(letter.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(Name, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/analyzer/standard/standard.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package standard
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/lang/en"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "standard"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopEnFilter, err := cache.TokenFilterNamed(en.StopName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopEnFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(Name, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/analyzer/web/web.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package web
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/lang/en"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/web"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "web"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(web.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopEnFilter, err := cache.TokenFilterNamed(en.StopName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopEnFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(Name, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/benchmark_test.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package analysis_test
import (
index "github.com/blevesearch/bleve_index_api"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
"github.com/blevesearch/bleve/v2/registry"
)
func BenchmarkAnalysis(b *testing.B) {
for i := 0; i < b.N; i++ {
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(standard.Name)
if err != nil {
b.Fatal(err)
}
ts := analyzer.Analyze(bleveWikiArticle)
freqs := analysis.TokenFrequency(ts, nil, index.IncludeTermVectors)
if len(freqs) != 511 {
b.Errorf("expected %d freqs, got %d", 511, len(freqs))
}
}
}
var bleveWikiArticle = []byte(`Boiling liquid expanding vapor explosion
From Wikipedia, the free encyclopedia
See also: Boiler explosion and Steam explosion
Flames subsequent to a flammable liquid BLEVE from a tanker. BLEVEs do not necessarily involve fire.
This article's tone or style may not reflect the encyclopedic tone used on Wikipedia. See Wikipedia's guide to writing better articles for suggestions. (July 2013)
A boiling liquid expanding vapor explosion (BLEVE, /ˈblɛviː/ blev-ee) is an explosion caused by the rupture of a vessel containing a pressurized liquid above its boiling point.[1]
Contents [hide]
1 Mechanism
1.1 Water example
1.2 BLEVEs without chemical reactions
2 Fires
3 Incidents
4 Safety measures
5 See also
6 References
7 External links
Mechanism[edit]
This section needs additional citations for verification. Please help improve this article by adding citations to reliable sources. Unsourced material may be challenged and removed. (July 2013)
There are three characteristics of liquids which are relevant to the discussion of a BLEVE:
If a liquid in a sealed container is boiled, the pressure inside the container increases. As the liquid changes to a gas it expands - this expansion in a vented container would cause the gas and liquid to take up more space. In a sealed container the gas and liquid are not able to take up more space and so the pressure rises. Pressurized vessels containing liquids can reach an equilibrium where the liquid stops boiling and the pressure stops rising. This occurs when no more heat is being added to the system (either because it has reached ambient temperature or has had a heat source removed).
The boiling temperature of a liquid is dependent on pressure - high pressures will yield high boiling temperatures, and low pressures will yield low boiling temperatures. A common simple experiment is to place a cup of water in a vacuum chamber, and then reduce the pressure in the chamber until the water boils. By reducing the pressure the water will boil even at room temperature. This works both ways - if the pressure is increased beyond normal atmospheric pressures, the boiling of hot water could be suppressed far beyond normal temperatures. The cooling system of a modern internal combustion engine is a real-world example.
When a liquid boils it turns into a gas. The resulting gas takes up far more space than the liquid did.
Typically, a BLEVE starts with a container of liquid which is held above its normal, atmospheric-pressure boiling temperature. Many substances normally stored as liquids, such as CO2, propane, and other similar industrial gases have boiling temperatures, at atmospheric pressure, far below room temperature. In the case of water, a BLEVE could occur if a pressurized chamber of water is heated far beyond the standard 100 °C (212 °F). That container, because the boiling water pressurizes it, is capable of holding liquid water at very high temperatures.
If the pressurized vessel, containing liquid at high temperature (which may be room temperature, depending on the substance) ruptures, the pressure which prevents the liquid from boiling is lost. If the rupture is catastrophic, where the vessel is immediately incapable of holding any pressure at all, then there suddenly exists a large mass of liquid which is at very high temperature and very low pressure. This causes the entire volume of liquid to instantaneously boil, which in turn causes an extremely rapid expansion. Depending on temperatures, pressures and the substance involved, that expansion may be so rapid that it can be classified as an explosion, fully capable of inflicting severe damage on its surroundings.
Water example[edit]
Imagine, for example, a tank of pressurized liquid water held at 204.4 °C (400 °F). This tank would normally be pressurized to 1.7 MPa (250 psi) above atmospheric ("gauge") pressure. If the tank containing the water were to rupture, there would for a slight moment exist a volume of liquid water which would be
at atmospheric pressure, and
204.4 °C (400 °F).
At atmospheric pressure the boiling point of water is 100 °C (212 °F) - liquid water at atmospheric pressure cannot exist at temperatures higher than 100 °C (212 °F). At that moment, the water would boil and turn to vapour explosively, and the 204.4 °C (400 °F) liquid water turned to gas would take up a lot more volume than it did as liquid, causing a vapour explosion. Such explosions can happen when the superheated water of a steam engine escapes through a crack in a boiler, causing a boiler explosion.
BLEVEs without chemical reactions[edit]
It is important to note that a BLEVE need not be a chemical explosion—nor does there need to be a fire—however if a flammable substance is subject to a BLEVE it may also be subject to intense heating, either from an external source of heat which may have caused the vessel to rupture in the first place or from an internal source of localized heating such as skin friction. This heating can cause a flammable substance to ignite, adding a secondary explosion caused by the primary BLEVE. While blast effects of any BLEVE can be devastating, a flammable substance such as propane can add significantly to the danger.
Bleve explosion.svg
While the term BLEVE is most often used to describe the results of a container of flammable liquid rupturing due to fire, a BLEVE can occur even with a non-flammable substance such as water,[2] liquid nitrogen,[3] liquid helium or other refrigerants or cryogens, and therefore is not usually considered a type of chemical explosion.
Fires[edit]
BLEVEs can be caused by an external fire near the storage vessel causing heating of the contents and pressure build-up. While tanks are often designed to withstand great pressure, constant heating can cause the metal to weaken and eventually fail. If the tank is being heated in an area where there is no liquid, it may rupture faster without the liquid to absorb the heat. Gas containers are usually equipped with relief valves that vent off excess pressure, but the tank can still fail if the pressure is not released quickly enough.[1] Relief valves are sized to release pressure fast enough to prevent the pressure from increasing beyond the strength of the vessel, but not so fast as to be the cause of an explosion. An appropriately sized relief valve will allow the liquid inside to boil slowly, maintaining a constant pressure in the vessel until all the liquid has boiled and the vessel empties.
If the substance involved is flammable, it is likely that the resulting cloud of the substance will ignite after the BLEVE has occurred, forming a fireball and possibly a fuel-air explosion, also termed a vapor cloud explosion (VCE). If the materials are toxic, a large area will be contaminated.[4]
Incidents[edit]
The term "BLEVE" was coined by three researchers at Factory Mutual, in the analysis of an accident there in 1957 involving a chemical reactor vessel.[5]
In August 1959 the Kansas City Fire Department suffered its largest ever loss of life in the line of duty, when a 25,000 gallon (95,000 litre) gas tank exploded during a fire on Southwest Boulevard killing five firefighters. This was the first time BLEVE was used to describe a burning fuel tank.[citation needed]
Later incidents included the Cheapside Street Whisky Bond Fire in Glasgow, Scotland in 1960; Feyzin, France in 1966; Crescent City, Illinois in 1970; Kingman, Arizona in 1973; a liquid nitrogen tank rupture[6] at Air Products and Chemicals and Mobay Chemical Company at New Martinsville, West Virginia on January 31, 1978 [1];Texas City, Texas in 1978; Murdock, Illinois in 1983; San Juan Ixhuatepec, Mexico City in 1984; and Toronto, Ontario in 2008.
Safety measures[edit]
[icon] This section requires expansion. (July 2013)
Some fire mitigation measures are listed under liquefied petroleum gas.
See also[edit]
Boiler explosion
Expansion ratio
Explosive boiling or phase explosion
Rapid phase transition
Viareggio train derailment
2008 Toronto explosions
Gas carriers
Los Alfaques Disaster
Lac-Mégantic derailment
References[edit]
^ Jump up to: a b Kletz, Trevor (March 1990). Critical Aspects of Safety and Loss Prevention. London: Butterworth–Heinemann. pp. 43–45. ISBN 0-408-04429-2.
Jump up ^ "Temperature Pressure Relief Valves on Water Heaters: test, inspect, replace, repair guide". Inspect-ny.com. Retrieved 2011-07-12.
Jump up ^ Liquid nitrogen BLEVE demo
Jump up ^ "Chemical Process Safety" (PDF). Retrieved 2011-07-12.
Jump up ^ David F. Peterson, BLEVE: Facts, Risk Factors, and Fallacies, Fire Engineering magazine (2002).
Jump up ^ "STATE EX REL. VAPOR CORP. v. NARICK". Supreme Court of Appeals of West Virginia. 1984-07-12. Retrieved 2014-03-16.
External links[edit]
Look up boiling liquid expanding vapor explosion in Wiktionary, the free dictionary.
Wikimedia Commons has media related to BLEVE.
BLEVE Demo on YouTube — video of a controlled BLEVE demo
huge explosions on YouTube — video of propane and isobutane BLEVEs from a train derailment at Murdock, Illinois (3 September 1983)
Propane BLEVE on YouTube — video of BLEVE from the Toronto propane depot fire
Moscow Ring Road Accident on YouTube - Dozens of LPG tank BLEVEs after a road accident in Moscow
Kingman, AZ BLEVE — An account of the 5 July 1973 explosion in Kingman, with photographs
Propane Tank Explosions — Description of circumstances required to cause a propane tank BLEVE.
Analysis of BLEVE Events at DOE Sites - Details physics and mathematics of BLEVEs.
HID - SAFETY REPORT ASSESSMENT GUIDE: Whisky Maturation Warehouses - The liquor is aged in wooden barrels that can suffer BLEVE.
Categories: ExplosivesFirefightingFireTypes of fireGas technologiesIndustrial fires and explosions`)
================================================
FILE: analysis/char/asciifolding/asciifolding.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// converted to Go from Lucene's AsciiFoldingFilter
// https://lucene.apache.org/core/4_0_0/analyzers-common/org/apache/lucene/analysis/miscellaneous/ASCIIFoldingFilter.html
package asciifolding
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "asciifolding"
type AsciiFoldingFilter struct{}
func New() *AsciiFoldingFilter {
return &AsciiFoldingFilter{}
}
func (s *AsciiFoldingFilter) Filter(input []byte) []byte {
if len(input) == 0 {
return input
}
in := []rune(string(input))
length := len(in)
// Worst-case length required if all runes fold to 4 runes
out := make([]rune, length, length*4)
out = foldToASCII(in, 0, out, 0, length)
return []byte(string(out))
}
func AsciiFoldingFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) {
return New(), nil
}
func init() {
err := registry.RegisterCharFilter(Name, AsciiFoldingFilterConstructor)
if err != nil {
panic(err)
}
}
// Converts characters above ASCII to their ASCII equivalents.
// For example, accents are removed from accented characters.
func foldToASCII(input []rune, inputPos int, output []rune, outputPos int, length int) []rune {
end := inputPos + length
for pos := inputPos; pos < end; pos++ {
c := input[pos]
// Quick test: if it's not in range then just keep current character
if c < '\u0080' {
output[outputPos] = c
outputPos++
} else {
switch c {
case '\u00C0': // À [LATIN CAPITAL LETTER A WITH GRAVE]
fallthrough
case '\u00C1': // Á [LATIN CAPITAL LETTER A WITH ACUTE]
fallthrough
case '\u00C2': // Â [LATIN CAPITAL LETTER A WITH CIRCUMFLEX]
fallthrough
case '\u00C3': // Ã [LATIN CAPITAL LETTER A WITH TILDE]
fallthrough
case '\u00C4': // Ä [LATIN CAPITAL LETTER A WITH DIAERESIS]
fallthrough
case '\u00C5': // Å [LATIN CAPITAL LETTER A WITH RING ABOVE]
fallthrough
case '\u0100': // Ā [LATIN CAPITAL LETTER A WITH MACRON]
fallthrough
case '\u0102': // Ă [LATIN CAPITAL LETTER A WITH BREVE]
fallthrough
case '\u0104': // Ą [LATIN CAPITAL LETTER A WITH OGONEK]
fallthrough
case '\u018F': // Ə http://en.wikipedia.org/wiki/Schwa [LATIN CAPITAL LETTER SCHWA]
fallthrough
case '\u01CD': // Ǎ [LATIN CAPITAL LETTER A WITH CARON]
fallthrough
case '\u01DE': // Ǟ [LATIN CAPITAL LETTER A WITH DIAERESIS AND MACRON]
fallthrough
case '\u01E0': // Ǡ [LATIN CAPITAL LETTER A WITH DOT ABOVE AND MACRON]
fallthrough
case '\u01FA': // Ǻ [LATIN CAPITAL LETTER A WITH RING ABOVE AND ACUTE]
fallthrough
case '\u0200': // Ȁ [LATIN CAPITAL LETTER A WITH DOUBLE GRAVE]
fallthrough
case '\u0202': // Ȃ [LATIN CAPITAL LETTER A WITH INVERTED BREVE]
fallthrough
case '\u0226': // Ȧ [LATIN CAPITAL LETTER A WITH DOT ABOVE]
fallthrough
case '\u023A': // Ⱥ [LATIN CAPITAL LETTER A WITH STROKE]
fallthrough
case '\u1D00': // ᴀ [LATIN LETTER SMALL CAPITAL A]
fallthrough
case '\u1E00': // Ḁ [LATIN CAPITAL LETTER A WITH RING BELOW]
fallthrough
case '\u1EA0': // Ạ [LATIN CAPITAL LETTER A WITH DOT BELOW]
fallthrough
case '\u1EA2': // Ả [LATIN CAPITAL LETTER A WITH HOOK ABOVE]
fallthrough
case '\u1EA4': // Ấ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND ACUTE]
fallthrough
case '\u1EA6': // Ầ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND GRAVE]
fallthrough
case '\u1EA8': // Ẩ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
fallthrough
case '\u1EAA': // Ẫ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND TILDE]
fallthrough
case '\u1EAC': // Ậ [LATIN CAPITAL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
fallthrough
case '\u1EAE': // Ắ [LATIN CAPITAL LETTER A WITH BREVE AND ACUTE]
fallthrough
case '\u1EB0': // Ằ [LATIN CAPITAL LETTER A WITH BREVE AND GRAVE]
fallthrough
case '\u1EB2': // Ẳ [LATIN CAPITAL LETTER A WITH BREVE AND HOOK ABOVE]
fallthrough
case '\u1EB4': // Ẵ [LATIN CAPITAL LETTER A WITH BREVE AND TILDE]
fallthrough
case '\u24B6': // Ⓐ [CIRCLED LATIN CAPITAL LETTER A]
fallthrough
case '\uFF21': // A [FULLWIDTH LATIN CAPITAL LETTER A]
fallthrough
case '\u1EB6': // Ặ [LATIN CAPITAL LETTER A WITH BREVE AND DOT BELOW]
output[outputPos] = 'A'
outputPos++
case '\u00E0': // à [LATIN SMALL LETTER A WITH GRAVE]
fallthrough
case '\u00E1': // á [LATIN SMALL LETTER A WITH ACUTE]
fallthrough
case '\u00E2': // â [LATIN SMALL LETTER A WITH CIRCUMFLEX]
fallthrough
case '\u00E3': // ã [LATIN SMALL LETTER A WITH TILDE]
fallthrough
case '\u00E4': // ä [LATIN SMALL LETTER A WITH DIAERESIS]
fallthrough
case '\u00E5': // å [LATIN SMALL LETTER A WITH RING ABOVE]
fallthrough
case '\u0101': // ā [LATIN SMALL LETTER A WITH MACRON]
fallthrough
case '\u0103': // ă [LATIN SMALL LETTER A WITH BREVE]
fallthrough
case '\u0105': // ą [LATIN SMALL LETTER A WITH OGONEK]
fallthrough
case '\u01CE': // ǎ [LATIN SMALL LETTER A WITH CARON]
fallthrough
case '\u01DF': // ǟ [LATIN SMALL LETTER A WITH DIAERESIS AND MACRON]
fallthrough
case '\u01E1': // ǡ [LATIN SMALL LETTER A WITH DOT ABOVE AND MACRON]
fallthrough
case '\u01FB': // ǻ [LATIN SMALL LETTER A WITH RING ABOVE AND ACUTE]
fallthrough
case '\u0201': // ȁ [LATIN SMALL LETTER A WITH DOUBLE GRAVE]
fallthrough
case '\u0203': // ȃ [LATIN SMALL LETTER A WITH INVERTED BREVE]
fallthrough
case '\u0227': // ȧ [LATIN SMALL LETTER A WITH DOT ABOVE]
fallthrough
case '\u0250': // ɐ [LATIN SMALL LETTER TURNED A]
fallthrough
case '\u0259': // ə [LATIN SMALL LETTER SCHWA]
fallthrough
case '\u025A': // ɚ [LATIN SMALL LETTER SCHWA WITH HOOK]
fallthrough
case '\u1D8F': // ᶏ [LATIN SMALL LETTER A WITH RETROFLEX HOOK]
fallthrough
case '\u1D95': // ᶕ [LATIN SMALL LETTER SCHWA WITH RETROFLEX HOOK]
fallthrough
case '\u1E01': // ạ [LATIN SMALL LETTER A WITH RING BELOW]
fallthrough
case '\u1E9A': // ả [LATIN SMALL LETTER A WITH RIGHT HALF RING]
fallthrough
case '\u1EA1': // ạ [LATIN SMALL LETTER A WITH DOT BELOW]
fallthrough
case '\u1EA3': // ả [LATIN SMALL LETTER A WITH HOOK ABOVE]
fallthrough
case '\u1EA5': // ấ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND ACUTE]
fallthrough
case '\u1EA7': // ầ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND GRAVE]
fallthrough
case '\u1EA9': // ẩ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND HOOK ABOVE]
fallthrough
case '\u1EAB': // ẫ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND TILDE]
fallthrough
case '\u1EAD': // ậ [LATIN SMALL LETTER A WITH CIRCUMFLEX AND DOT BELOW]
fallthrough
case '\u1EAF': // ắ [LATIN SMALL LETTER A WITH BREVE AND ACUTE]
fallthrough
case '\u1EB1': // ằ [LATIN SMALL LETTER A WITH BREVE AND GRAVE]
fallthrough
case '\u1EB3': // ẳ [LATIN SMALL LETTER A WITH BREVE AND HOOK ABOVE]
fallthrough
case '\u1EB5': // ẵ [LATIN SMALL LETTER A WITH BREVE AND TILDE]
fallthrough
case '\u1EB7': // ặ [LATIN SMALL LETTER A WITH BREVE AND DOT BELOW]
fallthrough
case '\u2090': // ₐ [LATIN SUBSCRIPT SMALL LETTER A]
fallthrough
case '\u2094': // ₔ [LATIN SUBSCRIPT SMALL LETTER SCHWA]
fallthrough
case '\u24D0': // ⓐ [CIRCLED LATIN SMALL LETTER A]
fallthrough
case '\u2C65': // ⱥ [LATIN SMALL LETTER A WITH STROKE]
fallthrough
case '\u2C6F': // Ɐ [LATIN CAPITAL LETTER TURNED A]
fallthrough
case '\uFF41': // a [FULLWIDTH LATIN SMALL LETTER A]
output[outputPos] = 'a'
outputPos++
case '\uA732': // Ꜳ [LATIN CAPITAL LETTER AA]
output = output[:(len(output) + 1)]
output[outputPos] = 'A'
outputPos++
output[outputPos] = 'A'
outputPos++
case '\u00C6': // Æ [LATIN CAPITAL LETTER AE]
fallthrough
case '\u01E2': // Ǣ [LATIN CAPITAL LETTER AE WITH MACRON]
fallthrough
case '\u01FC': // Ǽ [LATIN CAPITAL LETTER AE WITH ACUTE]
fallthrough
case '\u1D01': // ᴁ [LATIN LETTER SMALL CAPITAL AE]
output = output[:(len(output) + 1)]
output[outputPos] = 'A'
outputPos++
output[outputPos] = 'E'
outputPos++
case '\uA734': // Ꜵ [LATIN CAPITAL LETTER AO]
output = output[:(len(output) + 1)]
output[outputPos] = 'A'
outputPos++
output[outputPos] = 'O'
outputPos++
case '\uA736': // Ꜷ [LATIN CAPITAL LETTER AU]
output = output[:(len(output) + 1)]
output[outputPos] = 'A'
outputPos++
output[outputPos] = 'U'
outputPos++
case '\uA738': // Ꜹ [LATIN CAPITAL LETTER AV]
fallthrough
case '\uA73A': // Ꜻ [LATIN CAPITAL LETTER AV WITH HORIZONTAL BAR]
output = output[:(len(output) + 1)]
output[outputPos] = 'A'
outputPos++
output[outputPos] = 'V'
outputPos++
case '\uA73C': // Ꜽ [LATIN CAPITAL LETTER AY]
output = output[:(len(output) + 1)]
output[outputPos] = 'A'
outputPos++
output[outputPos] = 'Y'
outputPos++
case '\u249C': // ⒜ [PARENTHESIZED LATIN SMALL LETTER A]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'a'
outputPos++
output[outputPos] = ')'
outputPos++
case '\uA733': // ꜳ [LATIN SMALL LETTER AA]
output = output[:(len(output) + 1)]
output[outputPos] = 'a'
outputPos++
output[outputPos] = 'a'
outputPos++
case '\u00E6': // æ [LATIN SMALL LETTER AE]
fallthrough
case '\u01E3': // ǣ [LATIN SMALL LETTER AE WITH MACRON]
fallthrough
case '\u01FD': // ǽ [LATIN SMALL LETTER AE WITH ACUTE]
fallthrough
case '\u1D02': // ᴂ [LATIN SMALL LETTER TURNED AE]
output = output[:(len(output) + 1)]
output[outputPos] = 'a'
outputPos++
output[outputPos] = 'e'
outputPos++
case '\uA735': // ꜵ [LATIN SMALL LETTER AO]
output = output[:(len(output) + 1)]
output[outputPos] = 'a'
outputPos++
output[outputPos] = 'o'
outputPos++
case '\uA737': // ꜷ [LATIN SMALL LETTER AU]
output = output[:(len(output) + 1)]
output[outputPos] = 'a'
outputPos++
output[outputPos] = 'u'
outputPos++
case '\uA739': // ꜹ [LATIN SMALL LETTER AV]
fallthrough
case '\uA73B': // ꜻ [LATIN SMALL LETTER AV WITH HORIZONTAL BAR]
output = output[:(len(output) + 1)]
output[outputPos] = 'a'
outputPos++
output[outputPos] = 'v'
outputPos++
case '\uA73D': // ꜽ [LATIN SMALL LETTER AY]
output = output[:(len(output) + 1)]
output[outputPos] = 'a'
outputPos++
output[outputPos] = 'y'
outputPos++
case '\u0181': // Ɓ [LATIN CAPITAL LETTER B WITH HOOK]
fallthrough
case '\u0182': // Ƃ [LATIN CAPITAL LETTER B WITH TOPBAR]
fallthrough
case '\u0243': // Ƀ [LATIN CAPITAL LETTER B WITH STROKE]
fallthrough
case '\u0299': // ʙ [LATIN LETTER SMALL CAPITAL B]
fallthrough
case '\u1D03': // ᴃ [LATIN LETTER SMALL CAPITAL BARRED B]
fallthrough
case '\u1E02': // Ḃ [LATIN CAPITAL LETTER B WITH DOT ABOVE]
fallthrough
case '\u1E04': // Ḅ [LATIN CAPITAL LETTER B WITH DOT BELOW]
fallthrough
case '\u1E06': // Ḇ [LATIN CAPITAL LETTER B WITH LINE BELOW]
fallthrough
case '\u24B7': // Ⓑ [CIRCLED LATIN CAPITAL LETTER B]
fallthrough
case '\uFF22': // B [FULLWIDTH LATIN CAPITAL LETTER B]
output[outputPos] = 'B'
outputPos++
case '\u0180': // ƀ [LATIN SMALL LETTER B WITH STROKE]
fallthrough
case '\u0183': // ƃ [LATIN SMALL LETTER B WITH TOPBAR]
fallthrough
case '\u0253': // ɓ [LATIN SMALL LETTER B WITH HOOK]
fallthrough
case '\u1D6C': // ᵬ [LATIN SMALL LETTER B WITH MIDDLE TILDE]
fallthrough
case '\u1D80': // ᶀ [LATIN SMALL LETTER B WITH PALATAL HOOK]
fallthrough
case '\u1E03': // ḃ [LATIN SMALL LETTER B WITH DOT ABOVE]
fallthrough
case '\u1E05': // ḅ [LATIN SMALL LETTER B WITH DOT BELOW]
fallthrough
case '\u1E07': // ḇ [LATIN SMALL LETTER B WITH LINE BELOW]
fallthrough
case '\u24D1': // ⓑ [CIRCLED LATIN SMALL LETTER B]
fallthrough
case '\uFF42': // b [FULLWIDTH LATIN SMALL LETTER B]
output[outputPos] = 'b'
outputPos++
case '\u249D': // ⒝ [PARENTHESIZED LATIN SMALL LETTER B]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'b'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u00C7': // Ç [LATIN CAPITAL LETTER C WITH CEDILLA]
fallthrough
case '\u0106': // Ć [LATIN CAPITAL LETTER C WITH ACUTE]
fallthrough
case '\u0108': // Ĉ [LATIN CAPITAL LETTER C WITH CIRCUMFLEX]
fallthrough
case '\u010A': // Ċ [LATIN CAPITAL LETTER C WITH DOT ABOVE]
fallthrough
case '\u010C': // Č [LATIN CAPITAL LETTER C WITH CARON]
fallthrough
case '\u0187': // Ƈ [LATIN CAPITAL LETTER C WITH HOOK]
fallthrough
case '\u023B': // Ȼ [LATIN CAPITAL LETTER C WITH STROKE]
fallthrough
case '\u0297': // ʗ [LATIN LETTER STRETCHED C]
fallthrough
case '\u1D04': // ᴄ [LATIN LETTER SMALL CAPITAL C]
fallthrough
case '\u1E08': // Ḉ [LATIN CAPITAL LETTER C WITH CEDILLA AND ACUTE]
fallthrough
case '\u24B8': // Ⓒ [CIRCLED LATIN CAPITAL LETTER C]
fallthrough
case '\uFF23': // C [FULLWIDTH LATIN CAPITAL LETTER C]
output[outputPos] = 'C'
outputPos++
case '\u00E7': // ç [LATIN SMALL LETTER C WITH CEDILLA]
fallthrough
case '\u0107': // ć [LATIN SMALL LETTER C WITH ACUTE]
fallthrough
case '\u0109': // ĉ [LATIN SMALL LETTER C WITH CIRCUMFLEX]
fallthrough
case '\u010B': // ċ [LATIN SMALL LETTER C WITH DOT ABOVE]
fallthrough
case '\u010D': // č [LATIN SMALL LETTER C WITH CARON]
fallthrough
case '\u0188': // ƈ [LATIN SMALL LETTER C WITH HOOK]
fallthrough
case '\u023C': // ȼ [LATIN SMALL LETTER C WITH STROKE]
fallthrough
case '\u0255': // ɕ [LATIN SMALL LETTER C WITH CURL]
fallthrough
case '\u1E09': // ḉ [LATIN SMALL LETTER C WITH CEDILLA AND ACUTE]
fallthrough
case '\u2184': // ↄ [LATIN SMALL LETTER REVERSED C]
fallthrough
case '\u24D2': // ⓒ [CIRCLED LATIN SMALL LETTER C]
fallthrough
case '\uA73E': // Ꜿ [LATIN CAPITAL LETTER REVERSED C WITH DOT]
fallthrough
case '\uA73F': // ꜿ [LATIN SMALL LETTER REVERSED C WITH DOT]
fallthrough
case '\uFF43': // c [FULLWIDTH LATIN SMALL LETTER C]
output[outputPos] = 'c'
outputPos++
case '\u249E': // ⒞ [PARENTHESIZED LATIN SMALL LETTER C]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'c'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u00D0': // Ð [LATIN CAPITAL LETTER ETH]
fallthrough
case '\u010E': // Ď [LATIN CAPITAL LETTER D WITH CARON]
fallthrough
case '\u0110': // Đ [LATIN CAPITAL LETTER D WITH STROKE]
fallthrough
case '\u0189': // Ɖ [LATIN CAPITAL LETTER AFRICAN D]
fallthrough
case '\u018A': // Ɗ [LATIN CAPITAL LETTER D WITH HOOK]
fallthrough
case '\u018B': // Ƌ [LATIN CAPITAL LETTER D WITH TOPBAR]
fallthrough
case '\u1D05': // ᴅ [LATIN LETTER SMALL CAPITAL D]
fallthrough
case '\u1D06': // ᴆ [LATIN LETTER SMALL CAPITAL ETH]
fallthrough
case '\u1E0A': // Ḋ [LATIN CAPITAL LETTER D WITH DOT ABOVE]
fallthrough
case '\u1E0C': // Ḍ [LATIN CAPITAL LETTER D WITH DOT BELOW]
fallthrough
case '\u1E0E': // Ḏ [LATIN CAPITAL LETTER D WITH LINE BELOW]
fallthrough
case '\u1E10': // Ḑ [LATIN CAPITAL LETTER D WITH CEDILLA]
fallthrough
case '\u1E12': // Ḓ [LATIN CAPITAL LETTER D WITH CIRCUMFLEX BELOW]
fallthrough
case '\u24B9': // Ⓓ [CIRCLED LATIN CAPITAL LETTER D]
fallthrough
case '\uA779': // Ꝺ [LATIN CAPITAL LETTER INSULAR D]
fallthrough
case '\uFF24': // D [FULLWIDTH LATIN CAPITAL LETTER D]
output[outputPos] = 'D'
outputPos++
case '\u00F0': // ð [LATIN SMALL LETTER ETH]
fallthrough
case '\u010F': // ď [LATIN SMALL LETTER D WITH CARON]
fallthrough
case '\u0111': // đ [LATIN SMALL LETTER D WITH STROKE]
fallthrough
case '\u018C': // ƌ [LATIN SMALL LETTER D WITH TOPBAR]
fallthrough
case '\u0221': // ȡ [LATIN SMALL LETTER D WITH CURL]
fallthrough
case '\u0256': // ɖ [LATIN SMALL LETTER D WITH TAIL]
fallthrough
case '\u0257': // ɗ [LATIN SMALL LETTER D WITH HOOK]
fallthrough
case '\u1D6D': // ᵭ [LATIN SMALL LETTER D WITH MIDDLE TILDE]
fallthrough
case '\u1D81': // ᶁ [LATIN SMALL LETTER D WITH PALATAL HOOK]
fallthrough
case '\u1D91': // ᶑ [LATIN SMALL LETTER D WITH HOOK AND TAIL]
fallthrough
case '\u1E0B': // ḋ [LATIN SMALL LETTER D WITH DOT ABOVE]
fallthrough
case '\u1E0D': // ḍ [LATIN SMALL LETTER D WITH DOT BELOW]
fallthrough
case '\u1E0F': // ḏ [LATIN SMALL LETTER D WITH LINE BELOW]
fallthrough
case '\u1E11': // ḑ [LATIN SMALL LETTER D WITH CEDILLA]
fallthrough
case '\u1E13': // ḓ [LATIN SMALL LETTER D WITH CIRCUMFLEX BELOW]
fallthrough
case '\u24D3': // ⓓ [CIRCLED LATIN SMALL LETTER D]
fallthrough
case '\uA77A': // ꝺ [LATIN SMALL LETTER INSULAR D]
fallthrough
case '\uFF44': // d [FULLWIDTH LATIN SMALL LETTER D]
output[outputPos] = 'd'
outputPos++
case '\u01C4': // DŽ [LATIN CAPITAL LETTER DZ WITH CARON]
fallthrough
case '\u01F1': // DZ [LATIN CAPITAL LETTER DZ]
output = output[:(len(output) + 1)]
output[outputPos] = 'D'
outputPos++
output[outputPos] = 'Z'
outputPos++
case '\u01C5': // Dž [LATIN CAPITAL LETTER D WITH SMALL LETTER Z WITH CARON]
fallthrough
case '\u01F2': // Dz [LATIN CAPITAL LETTER D WITH SMALL LETTER Z]
output = output[:(len(output) + 1)]
output[outputPos] = 'D'
outputPos++
output[outputPos] = 'z'
outputPos++
case '\u249F': // ⒟ [PARENTHESIZED LATIN SMALL LETTER D]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'd'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u0238': // ȸ [LATIN SMALL LETTER DB DIGRAPH]
output = output[:(len(output) + 1)]
output[outputPos] = 'd'
outputPos++
output[outputPos] = 'b'
outputPos++
case '\u01C6': // dž [LATIN SMALL LETTER DZ WITH CARON]
fallthrough
case '\u01F3': // dz [LATIN SMALL LETTER DZ]
fallthrough
case '\u02A3': // ʣ [LATIN SMALL LETTER DZ DIGRAPH]
fallthrough
case '\u02A5': // ʥ [LATIN SMALL LETTER DZ DIGRAPH WITH CURL]
output = output[:(len(output) + 1)]
output[outputPos] = 'd'
outputPos++
output[outputPos] = 'z'
outputPos++
case '\u00C8': // È [LATIN CAPITAL LETTER E WITH GRAVE]
fallthrough
case '\u00C9': // É [LATIN CAPITAL LETTER E WITH ACUTE]
fallthrough
case '\u00CA': // Ê [LATIN CAPITAL LETTER E WITH CIRCUMFLEX]
fallthrough
case '\u00CB': // Ë [LATIN CAPITAL LETTER E WITH DIAERESIS]
fallthrough
case '\u0112': // Ē [LATIN CAPITAL LETTER E WITH MACRON]
fallthrough
case '\u0114': // Ĕ [LATIN CAPITAL LETTER E WITH BREVE]
fallthrough
case '\u0116': // Ė [LATIN CAPITAL LETTER E WITH DOT ABOVE]
fallthrough
case '\u0118': // Ę [LATIN CAPITAL LETTER E WITH OGONEK]
fallthrough
case '\u011A': // Ě [LATIN CAPITAL LETTER E WITH CARON]
fallthrough
case '\u018E': // Ǝ [LATIN CAPITAL LETTER REVERSED E]
fallthrough
case '\u0190': // Ɛ [LATIN CAPITAL LETTER OPEN E]
fallthrough
case '\u0204': // Ȅ [LATIN CAPITAL LETTER E WITH DOUBLE GRAVE]
fallthrough
case '\u0206': // Ȇ [LATIN CAPITAL LETTER E WITH INVERTED BREVE]
fallthrough
case '\u0228': // Ȩ [LATIN CAPITAL LETTER E WITH CEDILLA]
fallthrough
case '\u0246': // Ɇ [LATIN CAPITAL LETTER E WITH STROKE]
fallthrough
case '\u1D07': // ᴇ [LATIN LETTER SMALL CAPITAL E]
fallthrough
case '\u1E14': // Ḕ [LATIN CAPITAL LETTER E WITH MACRON AND GRAVE]
fallthrough
case '\u1E16': // Ḗ [LATIN CAPITAL LETTER E WITH MACRON AND ACUTE]
fallthrough
case '\u1E18': // Ḙ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX BELOW]
fallthrough
case '\u1E1A': // Ḛ [LATIN CAPITAL LETTER E WITH TILDE BELOW]
fallthrough
case '\u1E1C': // Ḝ [LATIN CAPITAL LETTER E WITH CEDILLA AND BREVE]
fallthrough
case '\u1EB8': // Ẹ [LATIN CAPITAL LETTER E WITH DOT BELOW]
fallthrough
case '\u1EBA': // Ẻ [LATIN CAPITAL LETTER E WITH HOOK ABOVE]
fallthrough
case '\u1EBC': // Ẽ [LATIN CAPITAL LETTER E WITH TILDE]
fallthrough
case '\u1EBE': // Ế [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND ACUTE]
fallthrough
case '\u1EC0': // Ề [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND GRAVE]
fallthrough
case '\u1EC2': // Ể [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
fallthrough
case '\u1EC4': // Ễ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND TILDE]
fallthrough
case '\u1EC6': // Ệ [LATIN CAPITAL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
fallthrough
case '\u24BA': // Ⓔ [CIRCLED LATIN CAPITAL LETTER E]
fallthrough
case '\u2C7B': // ⱻ [LATIN LETTER SMALL CAPITAL TURNED E]
fallthrough
case '\uFF25': // E [FULLWIDTH LATIN CAPITAL LETTER E]
output[outputPos] = 'E'
outputPos++
case '\u00E8': // è [LATIN SMALL LETTER E WITH GRAVE]
fallthrough
case '\u00E9': // é [LATIN SMALL LETTER E WITH ACUTE]
fallthrough
case '\u00EA': // ê [LATIN SMALL LETTER E WITH CIRCUMFLEX]
fallthrough
case '\u00EB': // ë [LATIN SMALL LETTER E WITH DIAERESIS]
fallthrough
case '\u0113': // ē [LATIN SMALL LETTER E WITH MACRON]
fallthrough
case '\u0115': // ĕ [LATIN SMALL LETTER E WITH BREVE]
fallthrough
case '\u0117': // ė [LATIN SMALL LETTER E WITH DOT ABOVE]
fallthrough
case '\u0119': // ę [LATIN SMALL LETTER E WITH OGONEK]
fallthrough
case '\u011B': // ě [LATIN SMALL LETTER E WITH CARON]
fallthrough
case '\u01DD': // ǝ [LATIN SMALL LETTER TURNED E]
fallthrough
case '\u0205': // ȅ [LATIN SMALL LETTER E WITH DOUBLE GRAVE]
fallthrough
case '\u0207': // ȇ [LATIN SMALL LETTER E WITH INVERTED BREVE]
fallthrough
case '\u0229': // ȩ [LATIN SMALL LETTER E WITH CEDILLA]
fallthrough
case '\u0247': // ɇ [LATIN SMALL LETTER E WITH STROKE]
fallthrough
case '\u0258': // ɘ [LATIN SMALL LETTER REVERSED E]
fallthrough
case '\u025B': // ɛ [LATIN SMALL LETTER OPEN E]
fallthrough
case '\u025C': // ɜ [LATIN SMALL LETTER REVERSED OPEN E]
fallthrough
case '\u025D': // ɝ [LATIN SMALL LETTER REVERSED OPEN E WITH HOOK]
fallthrough
case '\u025E': // ɞ [LATIN SMALL LETTER CLOSED REVERSED OPEN E]
fallthrough
case '\u029A': // ʚ [LATIN SMALL LETTER CLOSED OPEN E]
fallthrough
case '\u1D08': // ᴈ [LATIN SMALL LETTER TURNED OPEN E]
fallthrough
case '\u1D92': // ᶒ [LATIN SMALL LETTER E WITH RETROFLEX HOOK]
fallthrough
case '\u1D93': // ᶓ [LATIN SMALL LETTER OPEN E WITH RETROFLEX HOOK]
fallthrough
case '\u1D94': // ᶔ [LATIN SMALL LETTER REVERSED OPEN E WITH RETROFLEX HOOK]
fallthrough
case '\u1E15': // ḕ [LATIN SMALL LETTER E WITH MACRON AND GRAVE]
fallthrough
case '\u1E17': // ḗ [LATIN SMALL LETTER E WITH MACRON AND ACUTE]
fallthrough
case '\u1E19': // ḙ [LATIN SMALL LETTER E WITH CIRCUMFLEX BELOW]
fallthrough
case '\u1E1B': // ḛ [LATIN SMALL LETTER E WITH TILDE BELOW]
fallthrough
case '\u1E1D': // ḝ [LATIN SMALL LETTER E WITH CEDILLA AND BREVE]
fallthrough
case '\u1EB9': // ẹ [LATIN SMALL LETTER E WITH DOT BELOW]
fallthrough
case '\u1EBB': // ẻ [LATIN SMALL LETTER E WITH HOOK ABOVE]
fallthrough
case '\u1EBD': // ẽ [LATIN SMALL LETTER E WITH TILDE]
fallthrough
case '\u1EBF': // ế [LATIN SMALL LETTER E WITH CIRCUMFLEX AND ACUTE]
fallthrough
case '\u1EC1': // ề [LATIN SMALL LETTER E WITH CIRCUMFLEX AND GRAVE]
fallthrough
case '\u1EC3': // ể [LATIN SMALL LETTER E WITH CIRCUMFLEX AND HOOK ABOVE]
fallthrough
case '\u1EC5': // ễ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND TILDE]
fallthrough
case '\u1EC7': // ệ [LATIN SMALL LETTER E WITH CIRCUMFLEX AND DOT BELOW]
fallthrough
case '\u2091': // ₑ [LATIN SUBSCRIPT SMALL LETTER E]
fallthrough
case '\u24D4': // ⓔ [CIRCLED LATIN SMALL LETTER E]
fallthrough
case '\u2C78': // ⱸ [LATIN SMALL LETTER E WITH NOTCH]
fallthrough
case '\uFF45': // e [FULLWIDTH LATIN SMALL LETTER E]
output[outputPos] = 'e'
outputPos++
case '\u24A0': // ⒠ [PARENTHESIZED LATIN SMALL LETTER E]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'e'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u0191': // Ƒ [LATIN CAPITAL LETTER F WITH HOOK]
fallthrough
case '\u1E1E': // Ḟ [LATIN CAPITAL LETTER F WITH DOT ABOVE]
fallthrough
case '\u24BB': // Ⓕ [CIRCLED LATIN CAPITAL LETTER F]
fallthrough
case '\uA730': // ꜰ [LATIN LETTER SMALL CAPITAL F]
fallthrough
case '\uA77B': // Ꝼ [LATIN CAPITAL LETTER INSULAR F]
fallthrough
case '\uA7FB': // ꟻ [LATIN EPIGRAPHIC LETTER REVERSED F]
fallthrough
case '\uFF26': // F [FULLWIDTH LATIN CAPITAL LETTER F]
output[outputPos] = 'F'
outputPos++
case '\u0192': // ƒ [LATIN SMALL LETTER F WITH HOOK]
fallthrough
case '\u1D6E': // ᵮ [LATIN SMALL LETTER F WITH MIDDLE TILDE]
fallthrough
case '\u1D82': // ᶂ [LATIN SMALL LETTER F WITH PALATAL HOOK]
fallthrough
case '\u1E1F': // ḟ [LATIN SMALL LETTER F WITH DOT ABOVE]
fallthrough
case '\u1E9B': // ẛ [LATIN SMALL LETTER LONG S WITH DOT ABOVE]
fallthrough
case '\u24D5': // ⓕ [CIRCLED LATIN SMALL LETTER F]
fallthrough
case '\uA77C': // ꝼ [LATIN SMALL LETTER INSULAR F]
fallthrough
case '\uFF46': // f [FULLWIDTH LATIN SMALL LETTER F]
output[outputPos] = 'f'
outputPos++
case '\u24A1': // ⒡ [PARENTHESIZED LATIN SMALL LETTER F]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'f'
outputPos++
output[outputPos] = ')'
outputPos++
case '\uFB00': // ff [LATIN SMALL LIGATURE FF]
output = output[:(len(output) + 1)]
output[outputPos] = 'f'
outputPos++
output[outputPos] = 'f'
outputPos++
case '\uFB03': // ffi [LATIN SMALL LIGATURE FFI]
output = output[:(len(output) + 2)]
output[outputPos] = 'f'
outputPos++
output[outputPos] = 'f'
outputPos++
output[outputPos] = 'i'
outputPos++
case '\uFB04': // ffl [LATIN SMALL LIGATURE FFL]
output = output[:(len(output) + 2)]
output[outputPos] = 'f'
outputPos++
output[outputPos] = 'f'
outputPos++
output[outputPos] = 'l'
outputPos++
case '\uFB01': // fi [LATIN SMALL LIGATURE FI]
output = output[:(len(output) + 1)]
output[outputPos] = 'f'
outputPos++
output[outputPos] = 'i'
outputPos++
case '\uFB02': // fl [LATIN SMALL LIGATURE FL]
output = output[:(len(output) + 1)]
output[outputPos] = 'f'
outputPos++
output[outputPos] = 'l'
outputPos++
case '\u011C': // Ĝ [LATIN CAPITAL LETTER G WITH CIRCUMFLEX]
fallthrough
case '\u011E': // Ğ [LATIN CAPITAL LETTER G WITH BREVE]
fallthrough
case '\u0120': // Ġ [LATIN CAPITAL LETTER G WITH DOT ABOVE]
fallthrough
case '\u0122': // Ģ [LATIN CAPITAL LETTER G WITH CEDILLA]
fallthrough
case '\u0193': // Ɠ [LATIN CAPITAL LETTER G WITH HOOK]
fallthrough
case '\u01E4': // Ǥ [LATIN CAPITAL LETTER G WITH STROKE]
fallthrough
case '\u01E5': // ǥ [LATIN SMALL LETTER G WITH STROKE]
fallthrough
case '\u01E6': // Ǧ [LATIN CAPITAL LETTER G WITH CARON]
fallthrough
case '\u01E7': // ǧ [LATIN SMALL LETTER G WITH CARON]
fallthrough
case '\u01F4': // Ǵ [LATIN CAPITAL LETTER G WITH ACUTE]
fallthrough
case '\u0262': // ɢ [LATIN LETTER SMALL CAPITAL G]
fallthrough
case '\u029B': // ʛ [LATIN LETTER SMALL CAPITAL G WITH HOOK]
fallthrough
case '\u1E20': // Ḡ [LATIN CAPITAL LETTER G WITH MACRON]
fallthrough
case '\u24BC': // Ⓖ [CIRCLED LATIN CAPITAL LETTER G]
fallthrough
case '\uA77D': // Ᵹ [LATIN CAPITAL LETTER INSULAR G]
fallthrough
case '\uA77E': // Ꝿ [LATIN CAPITAL LETTER TURNED INSULAR G]
fallthrough
case '\uFF27': // G [FULLWIDTH LATIN CAPITAL LETTER G]
output[outputPos] = 'G'
outputPos++
case '\u011D': // ĝ [LATIN SMALL LETTER G WITH CIRCUMFLEX]
fallthrough
case '\u011F': // ğ [LATIN SMALL LETTER G WITH BREVE]
fallthrough
case '\u0121': // ġ [LATIN SMALL LETTER G WITH DOT ABOVE]
fallthrough
case '\u0123': // ģ [LATIN SMALL LETTER G WITH CEDILLA]
fallthrough
case '\u01F5': // ǵ [LATIN SMALL LETTER G WITH ACUTE]
fallthrough
case '\u0260': // ɠ [LATIN SMALL LETTER G WITH HOOK]
fallthrough
case '\u0261': // ɡ [LATIN SMALL LETTER SCRIPT G]
fallthrough
case '\u1D77': // ᵷ [LATIN SMALL LETTER TURNED G]
fallthrough
case '\u1D79': // ᵹ [LATIN SMALL LETTER INSULAR G]
fallthrough
case '\u1D83': // ᶃ [LATIN SMALL LETTER G WITH PALATAL HOOK]
fallthrough
case '\u1E21': // ḡ [LATIN SMALL LETTER G WITH MACRON]
fallthrough
case '\u24D6': // ⓖ [CIRCLED LATIN SMALL LETTER G]
fallthrough
case '\uA77F': // ꝿ [LATIN SMALL LETTER TURNED INSULAR G]
fallthrough
case '\uFF47': // g [FULLWIDTH LATIN SMALL LETTER G]
output[outputPos] = 'g'
outputPos++
case '\u24A2': // ⒢ [PARENTHESIZED LATIN SMALL LETTER G]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'g'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u0124': // Ĥ [LATIN CAPITAL LETTER H WITH CIRCUMFLEX]
fallthrough
case '\u0126': // Ħ [LATIN CAPITAL LETTER H WITH STROKE]
fallthrough
case '\u021E': // Ȟ [LATIN CAPITAL LETTER H WITH CARON]
fallthrough
case '\u029C': // ʜ [LATIN LETTER SMALL CAPITAL H]
fallthrough
case '\u1E22': // Ḣ [LATIN CAPITAL LETTER H WITH DOT ABOVE]
fallthrough
case '\u1E24': // Ḥ [LATIN CAPITAL LETTER H WITH DOT BELOW]
fallthrough
case '\u1E26': // Ḧ [LATIN CAPITAL LETTER H WITH DIAERESIS]
fallthrough
case '\u1E28': // Ḩ [LATIN CAPITAL LETTER H WITH CEDILLA]
fallthrough
case '\u1E2A': // Ḫ [LATIN CAPITAL LETTER H WITH BREVE BELOW]
fallthrough
case '\u24BD': // Ⓗ [CIRCLED LATIN CAPITAL LETTER H]
fallthrough
case '\u2C67': // Ⱨ [LATIN CAPITAL LETTER H WITH DESCENDER]
fallthrough
case '\u2C75': // Ⱶ [LATIN CAPITAL LETTER HALF H]
fallthrough
case '\uFF28': // H [FULLWIDTH LATIN CAPITAL LETTER H]
output[outputPos] = 'H'
outputPos++
case '\u0125': // ĥ [LATIN SMALL LETTER H WITH CIRCUMFLEX]
fallthrough
case '\u0127': // ħ [LATIN SMALL LETTER H WITH STROKE]
fallthrough
case '\u021F': // ȟ [LATIN SMALL LETTER H WITH CARON]
fallthrough
case '\u0265': // ɥ [LATIN SMALL LETTER TURNED H]
fallthrough
case '\u0266': // ɦ [LATIN SMALL LETTER H WITH HOOK]
fallthrough
case '\u02AE': // ʮ [LATIN SMALL LETTER TURNED H WITH FISHHOOK]
fallthrough
case '\u02AF': // ʯ [LATIN SMALL LETTER TURNED H WITH FISHHOOK AND TAIL]
fallthrough
case '\u1E23': // ḣ [LATIN SMALL LETTER H WITH DOT ABOVE]
fallthrough
case '\u1E25': // ḥ [LATIN SMALL LETTER H WITH DOT BELOW]
fallthrough
case '\u1E27': // ḧ [LATIN SMALL LETTER H WITH DIAERESIS]
fallthrough
case '\u1E29': // ḩ [LATIN SMALL LETTER H WITH CEDILLA]
fallthrough
case '\u1E2B': // ḫ [LATIN SMALL LETTER H WITH BREVE BELOW]
fallthrough
case '\u1E96': // ẖ [LATIN SMALL LETTER H WITH LINE BELOW]
fallthrough
case '\u24D7': // ⓗ [CIRCLED LATIN SMALL LETTER H]
fallthrough
case '\u2C68': // ⱨ [LATIN SMALL LETTER H WITH DESCENDER]
fallthrough
case '\u2C76': // ⱶ [LATIN SMALL LETTER HALF H]
fallthrough
case '\uFF48': // h [FULLWIDTH LATIN SMALL LETTER H]
output[outputPos] = 'h'
outputPos++
case '\u01F6': // Ƕ http://en.wikipedia.org/wiki/Hwair [LATIN CAPITAL LETTER HWAIR]
output = output[:(len(output) + 1)]
output[outputPos] = 'H'
outputPos++
output[outputPos] = 'V'
outputPos++
case '\u24A3': // ⒣ [PARENTHESIZED LATIN SMALL LETTER H]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'h'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u0195': // ƕ [LATIN SMALL LETTER HV]
output = output[:(len(output) + 1)]
output[outputPos] = 'h'
outputPos++
output[outputPos] = 'v'
outputPos++
case '\u00CC': // Ì [LATIN CAPITAL LETTER I WITH GRAVE]
fallthrough
case '\u00CD': // Í [LATIN CAPITAL LETTER I WITH ACUTE]
fallthrough
case '\u00CE': // Î [LATIN CAPITAL LETTER I WITH CIRCUMFLEX]
fallthrough
case '\u00CF': // Ï [LATIN CAPITAL LETTER I WITH DIAERESIS]
fallthrough
case '\u0128': // Ĩ [LATIN CAPITAL LETTER I WITH TILDE]
fallthrough
case '\u012A': // Ī [LATIN CAPITAL LETTER I WITH MACRON]
fallthrough
case '\u012C': // Ĭ [LATIN CAPITAL LETTER I WITH BREVE]
fallthrough
case '\u012E': // Į [LATIN CAPITAL LETTER I WITH OGONEK]
fallthrough
case '\u0130': // İ [LATIN CAPITAL LETTER I WITH DOT ABOVE]
fallthrough
case '\u0196': // Ɩ [LATIN CAPITAL LETTER IOTA]
fallthrough
case '\u0197': // Ɨ [LATIN CAPITAL LETTER I WITH STROKE]
fallthrough
case '\u01CF': // Ǐ [LATIN CAPITAL LETTER I WITH CARON]
fallthrough
case '\u0208': // Ȉ [LATIN CAPITAL LETTER I WITH DOUBLE GRAVE]
fallthrough
case '\u020A': // Ȋ [LATIN CAPITAL LETTER I WITH INVERTED BREVE]
fallthrough
case '\u026A': // ɪ [LATIN LETTER SMALL CAPITAL I]
fallthrough
case '\u1D7B': // ᵻ [LATIN SMALL CAPITAL LETTER I WITH STROKE]
fallthrough
case '\u1E2C': // Ḭ [LATIN CAPITAL LETTER I WITH TILDE BELOW]
fallthrough
case '\u1E2E': // Ḯ [LATIN CAPITAL LETTER I WITH DIAERESIS AND ACUTE]
fallthrough
case '\u1EC8': // Ỉ [LATIN CAPITAL LETTER I WITH HOOK ABOVE]
fallthrough
case '\u1ECA': // Ị [LATIN CAPITAL LETTER I WITH DOT BELOW]
fallthrough
case '\u24BE': // Ⓘ [CIRCLED LATIN CAPITAL LETTER I]
fallthrough
case '\uA7FE': // ꟾ [LATIN EPIGRAPHIC LETTER I LONGA]
fallthrough
case '\uFF29': // I [FULLWIDTH LATIN CAPITAL LETTER I]
output[outputPos] = 'I'
outputPos++
case '\u00EC': // ì [LATIN SMALL LETTER I WITH GRAVE]
fallthrough
case '\u00ED': // í [LATIN SMALL LETTER I WITH ACUTE]
fallthrough
case '\u00EE': // î [LATIN SMALL LETTER I WITH CIRCUMFLEX]
fallthrough
case '\u00EF': // ï [LATIN SMALL LETTER I WITH DIAERESIS]
fallthrough
case '\u0129': // ĩ [LATIN SMALL LETTER I WITH TILDE]
fallthrough
case '\u012B': // ī [LATIN SMALL LETTER I WITH MACRON]
fallthrough
case '\u012D': // ĭ [LATIN SMALL LETTER I WITH BREVE]
fallthrough
case '\u012F': // į [LATIN SMALL LETTER I WITH OGONEK]
fallthrough
case '\u0131': // ı [LATIN SMALL LETTER DOTLESS I]
fallthrough
case '\u01D0': // ǐ [LATIN SMALL LETTER I WITH CARON]
fallthrough
case '\u0209': // ȉ [LATIN SMALL LETTER I WITH DOUBLE GRAVE]
fallthrough
case '\u020B': // ȋ [LATIN SMALL LETTER I WITH INVERTED BREVE]
fallthrough
case '\u0268': // ɨ [LATIN SMALL LETTER I WITH STROKE]
fallthrough
case '\u1D09': // ᴉ [LATIN SMALL LETTER TURNED I]
fallthrough
case '\u1D62': // ᵢ [LATIN SUBSCRIPT SMALL LETTER I]
fallthrough
case '\u1D7C': // ᵼ [LATIN SMALL LETTER IOTA WITH STROKE]
fallthrough
case '\u1D96': // ᶖ [LATIN SMALL LETTER I WITH RETROFLEX HOOK]
fallthrough
case '\u1E2D': // ḭ [LATIN SMALL LETTER I WITH TILDE BELOW]
fallthrough
case '\u1E2F': // ḯ [LATIN SMALL LETTER I WITH DIAERESIS AND ACUTE]
fallthrough
case '\u1EC9': // ỉ [LATIN SMALL LETTER I WITH HOOK ABOVE]
fallthrough
case '\u1ECB': // ị [LATIN SMALL LETTER I WITH DOT BELOW]
fallthrough
case '\u2071': // ⁱ [SUPERSCRIPT LATIN SMALL LETTER I]
fallthrough
case '\u24D8': // ⓘ [CIRCLED LATIN SMALL LETTER I]
fallthrough
case '\uFF49': // i [FULLWIDTH LATIN SMALL LETTER I]
output[outputPos] = 'i'
outputPos++
case '\u0132': // IJ [LATIN CAPITAL LIGATURE IJ]
output = output[:(len(output) + 1)]
output[outputPos] = 'I'
outputPos++
output[outputPos] = 'J'
outputPos++
case '\u24A4': // ⒤ [PARENTHESIZED LATIN SMALL LETTER I]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'i'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u0133': // ij [LATIN SMALL LIGATURE IJ]
output = output[:(len(output) + 1)]
output[outputPos] = 'i'
outputPos++
output[outputPos] = 'j'
outputPos++
case '\u0134': // Ĵ [LATIN CAPITAL LETTER J WITH CIRCUMFLEX]
fallthrough
case '\u0248': // Ɉ [LATIN CAPITAL LETTER J WITH STROKE]
fallthrough
case '\u1D0A': // ᴊ [LATIN LETTER SMALL CAPITAL J]
fallthrough
case '\u24BF': // Ⓙ [CIRCLED LATIN CAPITAL LETTER J]
fallthrough
case '\uFF2A': // J [FULLWIDTH LATIN CAPITAL LETTER J]
output[outputPos] = 'J'
outputPos++
case '\u0135': // ĵ [LATIN SMALL LETTER J WITH CIRCUMFLEX]
fallthrough
case '\u01F0': // ǰ [LATIN SMALL LETTER J WITH CARON]
fallthrough
case '\u0237': // ȷ [LATIN SMALL LETTER DOTLESS J]
fallthrough
case '\u0249': // ɉ [LATIN SMALL LETTER J WITH STROKE]
fallthrough
case '\u025F': // ɟ [LATIN SMALL LETTER DOTLESS J WITH STROKE]
fallthrough
case '\u0284': // ʄ [LATIN SMALL LETTER DOTLESS J WITH STROKE AND HOOK]
fallthrough
case '\u029D': // ʝ [LATIN SMALL LETTER J WITH CROSSED-TAIL]
fallthrough
case '\u24D9': // ⓙ [CIRCLED LATIN SMALL LETTER J]
fallthrough
case '\u2C7C': // ⱼ [LATIN SUBSCRIPT SMALL LETTER J]
fallthrough
case '\uFF4A': // j [FULLWIDTH LATIN SMALL LETTER J]
output[outputPos] = 'j'
outputPos++
case '\u24A5': // ⒥ [PARENTHESIZED LATIN SMALL LETTER J]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'j'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u0136': // Ķ [LATIN CAPITAL LETTER K WITH CEDILLA]
fallthrough
case '\u0198': // Ƙ [LATIN CAPITAL LETTER K WITH HOOK]
fallthrough
case '\u01E8': // Ǩ [LATIN CAPITAL LETTER K WITH CARON]
fallthrough
case '\u1D0B': // ᴋ [LATIN LETTER SMALL CAPITAL K]
fallthrough
case '\u1E30': // Ḱ [LATIN CAPITAL LETTER K WITH ACUTE]
fallthrough
case '\u1E32': // Ḳ [LATIN CAPITAL LETTER K WITH DOT BELOW]
fallthrough
case '\u1E34': // Ḵ [LATIN CAPITAL LETTER K WITH LINE BELOW]
fallthrough
case '\u24C0': // Ⓚ [CIRCLED LATIN CAPITAL LETTER K]
fallthrough
case '\u2C69': // Ⱪ [LATIN CAPITAL LETTER K WITH DESCENDER]
fallthrough
case '\uA740': // Ꝁ [LATIN CAPITAL LETTER K WITH STROKE]
fallthrough
case '\uA742': // Ꝃ [LATIN CAPITAL LETTER K WITH DIAGONAL STROKE]
fallthrough
case '\uA744': // Ꝅ [LATIN CAPITAL LETTER K WITH STROKE AND DIAGONAL STROKE]
fallthrough
case '\uFF2B': // K [FULLWIDTH LATIN CAPITAL LETTER K]
output[outputPos] = 'K'
outputPos++
case '\u0137': // ķ [LATIN SMALL LETTER K WITH CEDILLA]
fallthrough
case '\u0199': // ƙ [LATIN SMALL LETTER K WITH HOOK]
fallthrough
case '\u01E9': // ǩ [LATIN SMALL LETTER K WITH CARON]
fallthrough
case '\u029E': // ʞ [LATIN SMALL LETTER TURNED K]
fallthrough
case '\u1D84': // ᶄ [LATIN SMALL LETTER K WITH PALATAL HOOK]
fallthrough
case '\u1E31': // ḱ [LATIN SMALL LETTER K WITH ACUTE]
fallthrough
case '\u1E33': // ḳ [LATIN SMALL LETTER K WITH DOT BELOW]
fallthrough
case '\u1E35': // ḵ [LATIN SMALL LETTER K WITH LINE BELOW]
fallthrough
case '\u24DA': // ⓚ [CIRCLED LATIN SMALL LETTER K]
fallthrough
case '\u2C6A': // ⱪ [LATIN SMALL LETTER K WITH DESCENDER]
fallthrough
case '\uA741': // ꝁ [LATIN SMALL LETTER K WITH STROKE]
fallthrough
case '\uA743': // ꝃ [LATIN SMALL LETTER K WITH DIAGONAL STROKE]
fallthrough
case '\uA745': // ꝅ [LATIN SMALL LETTER K WITH STROKE AND DIAGONAL STROKE]
fallthrough
case '\uFF4B': // k [FULLWIDTH LATIN SMALL LETTER K]
output[outputPos] = 'k'
outputPos++
case '\u24A6': // ⒦ [PARENTHESIZED LATIN SMALL LETTER K]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'k'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u0139': // Ĺ [LATIN CAPITAL LETTER L WITH ACUTE]
fallthrough
case '\u013B': // Ļ [LATIN CAPITAL LETTER L WITH CEDILLA]
fallthrough
case '\u013D': // Ľ [LATIN CAPITAL LETTER L WITH CARON]
fallthrough
case '\u013F': // Ŀ [LATIN CAPITAL LETTER L WITH MIDDLE DOT]
fallthrough
case '\u0141': // Ł [LATIN CAPITAL LETTER L WITH STROKE]
fallthrough
case '\u023D': // Ƚ [LATIN CAPITAL LETTER L WITH BAR]
fallthrough
case '\u029F': // ʟ [LATIN LETTER SMALL CAPITAL L]
fallthrough
case '\u1D0C': // ᴌ [LATIN LETTER SMALL CAPITAL L WITH STROKE]
fallthrough
case '\u1E36': // Ḷ [LATIN CAPITAL LETTER L WITH DOT BELOW]
fallthrough
case '\u1E38': // Ḹ [LATIN CAPITAL LETTER L WITH DOT BELOW AND MACRON]
fallthrough
case '\u1E3A': // Ḻ [LATIN CAPITAL LETTER L WITH LINE BELOW]
fallthrough
case '\u1E3C': // Ḽ [LATIN CAPITAL LETTER L WITH CIRCUMFLEX BELOW]
fallthrough
case '\u24C1': // Ⓛ [CIRCLED LATIN CAPITAL LETTER L]
fallthrough
case '\u2C60': // Ⱡ [LATIN CAPITAL LETTER L WITH DOUBLE BAR]
fallthrough
case '\u2C62': // Ɫ [LATIN CAPITAL LETTER L WITH MIDDLE TILDE]
fallthrough
case '\uA746': // Ꝇ [LATIN CAPITAL LETTER BROKEN L]
fallthrough
case '\uA748': // Ꝉ [LATIN CAPITAL LETTER L WITH HIGH STROKE]
fallthrough
case '\uA780': // Ꞁ [LATIN CAPITAL LETTER TURNED L]
fallthrough
case '\uFF2C': // L [FULLWIDTH LATIN CAPITAL LETTER L]
output[outputPos] = 'L'
outputPos++
case '\u013A': // ĺ [LATIN SMALL LETTER L WITH ACUTE]
fallthrough
case '\u013C': // ļ [LATIN SMALL LETTER L WITH CEDILLA]
fallthrough
case '\u013E': // ľ [LATIN SMALL LETTER L WITH CARON]
fallthrough
case '\u0140': // ŀ [LATIN SMALL LETTER L WITH MIDDLE DOT]
fallthrough
case '\u0142': // ł [LATIN SMALL LETTER L WITH STROKE]
fallthrough
case '\u019A': // ƚ [LATIN SMALL LETTER L WITH BAR]
fallthrough
case '\u0234': // ȴ [LATIN SMALL LETTER L WITH CURL]
fallthrough
case '\u026B': // ɫ [LATIN SMALL LETTER L WITH MIDDLE TILDE]
fallthrough
case '\u026C': // ɬ [LATIN SMALL LETTER L WITH BELT]
fallthrough
case '\u026D': // ɭ [LATIN SMALL LETTER L WITH RETROFLEX HOOK]
fallthrough
case '\u1D85': // ᶅ [LATIN SMALL LETTER L WITH PALATAL HOOK]
fallthrough
case '\u1E37': // ḷ [LATIN SMALL LETTER L WITH DOT BELOW]
fallthrough
case '\u1E39': // ḹ [LATIN SMALL LETTER L WITH DOT BELOW AND MACRON]
fallthrough
case '\u1E3B': // ḻ [LATIN SMALL LETTER L WITH LINE BELOW]
fallthrough
case '\u1E3D': // ḽ [LATIN SMALL LETTER L WITH CIRCUMFLEX BELOW]
fallthrough
case '\u24DB': // ⓛ [CIRCLED LATIN SMALL LETTER L]
fallthrough
case '\u2C61': // ⱡ [LATIN SMALL LETTER L WITH DOUBLE BAR]
fallthrough
case '\uA747': // ꝇ [LATIN SMALL LETTER BROKEN L]
fallthrough
case '\uA749': // ꝉ [LATIN SMALL LETTER L WITH HIGH STROKE]
fallthrough
case '\uA781': // ꞁ [LATIN SMALL LETTER TURNED L]
fallthrough
case '\uFF4C': // l [FULLWIDTH LATIN SMALL LETTER L]
output[outputPos] = 'l'
outputPos++
case '\u01C7': // LJ [LATIN CAPITAL LETTER LJ]
output = output[:(len(output) + 1)]
output[outputPos] = 'L'
outputPos++
output[outputPos] = 'J'
outputPos++
case '\u1EFA': // Ỻ [LATIN CAPITAL LETTER MIDDLE-WELSH LL]
output = output[:(len(output) + 1)]
output[outputPos] = 'L'
outputPos++
output[outputPos] = 'L'
outputPos++
case '\u01C8': // Lj [LATIN CAPITAL LETTER L WITH SMALL LETTER J]
output = output[:(len(output) + 1)]
output[outputPos] = 'L'
outputPos++
output[outputPos] = 'j'
outputPos++
case '\u24A7': // ⒧ [PARENTHESIZED LATIN SMALL LETTER L]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'l'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u01C9': // lj [LATIN SMALL LETTER LJ]
output = output[:(len(output) + 1)]
output[outputPos] = 'l'
outputPos++
output[outputPos] = 'j'
outputPos++
case '\u1EFB': // ỻ [LATIN SMALL LETTER MIDDLE-WELSH LL]
output = output[:(len(output) + 1)]
output[outputPos] = 'l'
outputPos++
output[outputPos] = 'l'
outputPos++
case '\u02AA': // ʪ [LATIN SMALL LETTER LS DIGRAPH]
output = output[:(len(output) + 1)]
output[outputPos] = 'l'
outputPos++
output[outputPos] = 's'
outputPos++
case '\u02AB': // ʫ [LATIN SMALL LETTER LZ DIGRAPH]
output = output[:(len(output) + 1)]
output[outputPos] = 'l'
outputPos++
output[outputPos] = 'z'
outputPos++
case '\u019C': // Ɯ [LATIN CAPITAL LETTER TURNED M]
fallthrough
case '\u1D0D': // ᴍ [LATIN LETTER SMALL CAPITAL M]
fallthrough
case '\u1E3E': // Ḿ [LATIN CAPITAL LETTER M WITH ACUTE]
fallthrough
case '\u1E40': // Ṁ [LATIN CAPITAL LETTER M WITH DOT ABOVE]
fallthrough
case '\u1E42': // Ṃ [LATIN CAPITAL LETTER M WITH DOT BELOW]
fallthrough
case '\u24C2': // Ⓜ [CIRCLED LATIN CAPITAL LETTER M]
fallthrough
case '\u2C6E': // Ɱ [LATIN CAPITAL LETTER M WITH HOOK]
fallthrough
case '\uA7FD': // ꟽ [LATIN EPIGRAPHIC LETTER INVERTED M]
fallthrough
case '\uA7FF': // ꟿ [LATIN EPIGRAPHIC LETTER ARCHAIC M]
fallthrough
case '\uFF2D': // M [FULLWIDTH LATIN CAPITAL LETTER M]
output[outputPos] = 'M'
outputPos++
case '\u026F': // ɯ [LATIN SMALL LETTER TURNED M]
fallthrough
case '\u0270': // ɰ [LATIN SMALL LETTER TURNED M WITH LONG LEG]
fallthrough
case '\u0271': // ɱ [LATIN SMALL LETTER M WITH HOOK]
fallthrough
case '\u1D6F': // ᵯ [LATIN SMALL LETTER M WITH MIDDLE TILDE]
fallthrough
case '\u1D86': // ᶆ [LATIN SMALL LETTER M WITH PALATAL HOOK]
fallthrough
case '\u1E3F': // ḿ [LATIN SMALL LETTER M WITH ACUTE]
fallthrough
case '\u1E41': // ṁ [LATIN SMALL LETTER M WITH DOT ABOVE]
fallthrough
case '\u1E43': // ṃ [LATIN SMALL LETTER M WITH DOT BELOW]
fallthrough
case '\u24DC': // ⓜ [CIRCLED LATIN SMALL LETTER M]
fallthrough
case '\uFF4D': // m [FULLWIDTH LATIN SMALL LETTER M]
output[outputPos] = 'm'
outputPos++
case '\u24A8': // ⒨ [PARENTHESIZED LATIN SMALL LETTER M]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'm'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u00D1': // Ñ [LATIN CAPITAL LETTER N WITH TILDE]
fallthrough
case '\u0143': // Ń [LATIN CAPITAL LETTER N WITH ACUTE]
fallthrough
case '\u0145': // Ņ [LATIN CAPITAL LETTER N WITH CEDILLA]
fallthrough
case '\u0147': // Ň [LATIN CAPITAL LETTER N WITH CARON]
fallthrough
case '\u014A': // Ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN CAPITAL LETTER ENG]
fallthrough
case '\u019D': // Ɲ [LATIN CAPITAL LETTER N WITH LEFT HOOK]
fallthrough
case '\u01F8': // Ǹ [LATIN CAPITAL LETTER N WITH GRAVE]
fallthrough
case '\u0220': // Ƞ [LATIN CAPITAL LETTER N WITH LONG RIGHT LEG]
fallthrough
case '\u0274': // ɴ [LATIN LETTER SMALL CAPITAL N]
fallthrough
case '\u1D0E': // ᴎ [LATIN LETTER SMALL CAPITAL REVERSED N]
fallthrough
case '\u1E44': // Ṅ [LATIN CAPITAL LETTER N WITH DOT ABOVE]
fallthrough
case '\u1E46': // Ṇ [LATIN CAPITAL LETTER N WITH DOT BELOW]
fallthrough
case '\u1E48': // Ṉ [LATIN CAPITAL LETTER N WITH LINE BELOW]
fallthrough
case '\u1E4A': // Ṋ [LATIN CAPITAL LETTER N WITH CIRCUMFLEX BELOW]
fallthrough
case '\u24C3': // Ⓝ [CIRCLED LATIN CAPITAL LETTER N]
fallthrough
case '\uFF2E': // N [FULLWIDTH LATIN CAPITAL LETTER N]
output[outputPos] = 'N'
outputPos++
case '\u00F1': // ñ [LATIN SMALL LETTER N WITH TILDE]
fallthrough
case '\u0144': // ń [LATIN SMALL LETTER N WITH ACUTE]
fallthrough
case '\u0146': // ņ [LATIN SMALL LETTER N WITH CEDILLA]
fallthrough
case '\u0148': // ň [LATIN SMALL LETTER N WITH CARON]
fallthrough
case '\u0149': // ʼn [LATIN SMALL LETTER N PRECEDED BY APOSTROPHE]
fallthrough
case '\u014B': // ŋ http://en.wikipedia.org/wiki/Eng_(letter) [LATIN SMALL LETTER ENG]
fallthrough
case '\u019E': // ƞ [LATIN SMALL LETTER N WITH LONG RIGHT LEG]
fallthrough
case '\u01F9': // ǹ [LATIN SMALL LETTER N WITH GRAVE]
fallthrough
case '\u0235': // ȵ [LATIN SMALL LETTER N WITH CURL]
fallthrough
case '\u0272': // ɲ [LATIN SMALL LETTER N WITH LEFT HOOK]
fallthrough
case '\u0273': // ɳ [LATIN SMALL LETTER N WITH RETROFLEX HOOK]
fallthrough
case '\u1D70': // ᵰ [LATIN SMALL LETTER N WITH MIDDLE TILDE]
fallthrough
case '\u1D87': // ᶇ [LATIN SMALL LETTER N WITH PALATAL HOOK]
fallthrough
case '\u1E45': // ṅ [LATIN SMALL LETTER N WITH DOT ABOVE]
fallthrough
case '\u1E47': // ṇ [LATIN SMALL LETTER N WITH DOT BELOW]
fallthrough
case '\u1E49': // ṉ [LATIN SMALL LETTER N WITH LINE BELOW]
fallthrough
case '\u1E4B': // ṋ [LATIN SMALL LETTER N WITH CIRCUMFLEX BELOW]
fallthrough
case '\u207F': // ⁿ [SUPERSCRIPT LATIN SMALL LETTER N]
fallthrough
case '\u24DD': // ⓝ [CIRCLED LATIN SMALL LETTER N]
fallthrough
case '\uFF4E': // n [FULLWIDTH LATIN SMALL LETTER N]
output[outputPos] = 'n'
outputPos++
case '\u01CA': // NJ [LATIN CAPITAL LETTER NJ]
output = output[:(len(output) + 1)]
output[outputPos] = 'N'
outputPos++
output[outputPos] = 'J'
outputPos++
case '\u01CB': // Nj [LATIN CAPITAL LETTER N WITH SMALL LETTER J]
output = output[:(len(output) + 1)]
output[outputPos] = 'N'
outputPos++
output[outputPos] = 'j'
outputPos++
case '\u24A9': // ⒩ [PARENTHESIZED LATIN SMALL LETTER N]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'n'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u01CC': // nj [LATIN SMALL LETTER NJ]
output = output[:(len(output) + 1)]
output[outputPos] = 'n'
outputPos++
output[outputPos] = 'j'
outputPos++
case '\u00D2': // Ò [LATIN CAPITAL LETTER O WITH GRAVE]
fallthrough
case '\u00D3': // Ó [LATIN CAPITAL LETTER O WITH ACUTE]
fallthrough
case '\u00D4': // Ô [LATIN CAPITAL LETTER O WITH CIRCUMFLEX]
fallthrough
case '\u00D5': // Õ [LATIN CAPITAL LETTER O WITH TILDE]
fallthrough
case '\u00D6': // Ö [LATIN CAPITAL LETTER O WITH DIAERESIS]
fallthrough
case '\u00D8': // Ø [LATIN CAPITAL LETTER O WITH STROKE]
fallthrough
case '\u014C': // Ō [LATIN CAPITAL LETTER O WITH MACRON]
fallthrough
case '\u014E': // Ŏ [LATIN CAPITAL LETTER O WITH BREVE]
fallthrough
case '\u0150': // Ő [LATIN CAPITAL LETTER O WITH DOUBLE ACUTE]
fallthrough
case '\u0186': // Ɔ [LATIN CAPITAL LETTER OPEN O]
fallthrough
case '\u019F': // Ɵ [LATIN CAPITAL LETTER O WITH MIDDLE TILDE]
fallthrough
case '\u01A0': // Ơ [LATIN CAPITAL LETTER O WITH HORN]
fallthrough
case '\u01D1': // Ǒ [LATIN CAPITAL LETTER O WITH CARON]
fallthrough
case '\u01EA': // Ǫ [LATIN CAPITAL LETTER O WITH OGONEK]
fallthrough
case '\u01EC': // Ǭ [LATIN CAPITAL LETTER O WITH OGONEK AND MACRON]
fallthrough
case '\u01FE': // Ǿ [LATIN CAPITAL LETTER O WITH STROKE AND ACUTE]
fallthrough
case '\u020C': // Ȍ [LATIN CAPITAL LETTER O WITH DOUBLE GRAVE]
fallthrough
case '\u020E': // Ȏ [LATIN CAPITAL LETTER O WITH INVERTED BREVE]
fallthrough
case '\u022A': // Ȫ [LATIN CAPITAL LETTER O WITH DIAERESIS AND MACRON]
fallthrough
case '\u022C': // Ȭ [LATIN CAPITAL LETTER O WITH TILDE AND MACRON]
fallthrough
case '\u022E': // Ȯ [LATIN CAPITAL LETTER O WITH DOT ABOVE]
fallthrough
case '\u0230': // Ȱ [LATIN CAPITAL LETTER O WITH DOT ABOVE AND MACRON]
fallthrough
case '\u1D0F': // ᴏ [LATIN LETTER SMALL CAPITAL O]
fallthrough
case '\u1D10': // ᴐ [LATIN LETTER SMALL CAPITAL OPEN O]
fallthrough
case '\u1E4C': // Ṍ [LATIN CAPITAL LETTER O WITH TILDE AND ACUTE]
fallthrough
case '\u1E4E': // Ṏ [LATIN CAPITAL LETTER O WITH TILDE AND DIAERESIS]
fallthrough
case '\u1E50': // Ṑ [LATIN CAPITAL LETTER O WITH MACRON AND GRAVE]
fallthrough
case '\u1E52': // Ṓ [LATIN CAPITAL LETTER O WITH MACRON AND ACUTE]
fallthrough
case '\u1ECC': // Ọ [LATIN CAPITAL LETTER O WITH DOT BELOW]
fallthrough
case '\u1ECE': // Ỏ [LATIN CAPITAL LETTER O WITH HOOK ABOVE]
fallthrough
case '\u1ED0': // Ố [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND ACUTE]
fallthrough
case '\u1ED2': // Ồ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND GRAVE]
fallthrough
case '\u1ED4': // Ổ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
fallthrough
case '\u1ED6': // Ỗ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND TILDE]
fallthrough
case '\u1ED8': // Ộ [LATIN CAPITAL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
fallthrough
case '\u1EDA': // Ớ [LATIN CAPITAL LETTER O WITH HORN AND ACUTE]
fallthrough
case '\u1EDC': // Ờ [LATIN CAPITAL LETTER O WITH HORN AND GRAVE]
fallthrough
case '\u1EDE': // Ở [LATIN CAPITAL LETTER O WITH HORN AND HOOK ABOVE]
fallthrough
case '\u1EE0': // Ỡ [LATIN CAPITAL LETTER O WITH HORN AND TILDE]
fallthrough
case '\u1EE2': // Ợ [LATIN CAPITAL LETTER O WITH HORN AND DOT BELOW]
fallthrough
case '\u24C4': // Ⓞ [CIRCLED LATIN CAPITAL LETTER O]
fallthrough
case '\uA74A': // Ꝋ [LATIN CAPITAL LETTER O WITH LONG STROKE OVERLAY]
fallthrough
case '\uA74C': // Ꝍ [LATIN CAPITAL LETTER O WITH LOOP]
fallthrough
case '\uFF2F': // O [FULLWIDTH LATIN CAPITAL LETTER O]
output[outputPos] = 'O'
outputPos++
case '\u00F2': // ò [LATIN SMALL LETTER O WITH GRAVE]
fallthrough
case '\u00F3': // ó [LATIN SMALL LETTER O WITH ACUTE]
fallthrough
case '\u00F4': // ô [LATIN SMALL LETTER O WITH CIRCUMFLEX]
fallthrough
case '\u00F5': // õ [LATIN SMALL LETTER O WITH TILDE]
fallthrough
case '\u00F6': // ö [LATIN SMALL LETTER O WITH DIAERESIS]
fallthrough
case '\u00F8': // ø [LATIN SMALL LETTER O WITH STROKE]
fallthrough
case '\u014D': // ō [LATIN SMALL LETTER O WITH MACRON]
fallthrough
case '\u014F': // ŏ [LATIN SMALL LETTER O WITH BREVE]
fallthrough
case '\u0151': // ő [LATIN SMALL LETTER O WITH DOUBLE ACUTE]
fallthrough
case '\u01A1': // ơ [LATIN SMALL LETTER O WITH HORN]
fallthrough
case '\u01D2': // ǒ [LATIN SMALL LETTER O WITH CARON]
fallthrough
case '\u01EB': // ǫ [LATIN SMALL LETTER O WITH OGONEK]
fallthrough
case '\u01ED': // ǭ [LATIN SMALL LETTER O WITH OGONEK AND MACRON]
fallthrough
case '\u01FF': // ǿ [LATIN SMALL LETTER O WITH STROKE AND ACUTE]
fallthrough
case '\u020D': // ȍ [LATIN SMALL LETTER O WITH DOUBLE GRAVE]
fallthrough
case '\u020F': // ȏ [LATIN SMALL LETTER O WITH INVERTED BREVE]
fallthrough
case '\u022B': // ȫ [LATIN SMALL LETTER O WITH DIAERESIS AND MACRON]
fallthrough
case '\u022D': // ȭ [LATIN SMALL LETTER O WITH TILDE AND MACRON]
fallthrough
case '\u022F': // ȯ [LATIN SMALL LETTER O WITH DOT ABOVE]
fallthrough
case '\u0231': // ȱ [LATIN SMALL LETTER O WITH DOT ABOVE AND MACRON]
fallthrough
case '\u0254': // ɔ [LATIN SMALL LETTER OPEN O]
fallthrough
case '\u0275': // ɵ [LATIN SMALL LETTER BARRED O]
fallthrough
case '\u1D16': // ᴖ [LATIN SMALL LETTER TOP HALF O]
fallthrough
case '\u1D17': // ᴗ [LATIN SMALL LETTER BOTTOM HALF O]
fallthrough
case '\u1D97': // ᶗ [LATIN SMALL LETTER OPEN O WITH RETROFLEX HOOK]
fallthrough
case '\u1E4D': // ṍ [LATIN SMALL LETTER O WITH TILDE AND ACUTE]
fallthrough
case '\u1E4F': // ṏ [LATIN SMALL LETTER O WITH TILDE AND DIAERESIS]
fallthrough
case '\u1E51': // ṑ [LATIN SMALL LETTER O WITH MACRON AND GRAVE]
fallthrough
case '\u1E53': // ṓ [LATIN SMALL LETTER O WITH MACRON AND ACUTE]
fallthrough
case '\u1ECD': // ọ [LATIN SMALL LETTER O WITH DOT BELOW]
fallthrough
case '\u1ECF': // ỏ [LATIN SMALL LETTER O WITH HOOK ABOVE]
fallthrough
case '\u1ED1': // ố [LATIN SMALL LETTER O WITH CIRCUMFLEX AND ACUTE]
fallthrough
case '\u1ED3': // ồ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND GRAVE]
fallthrough
case '\u1ED5': // ổ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND HOOK ABOVE]
fallthrough
case '\u1ED7': // ỗ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND TILDE]
fallthrough
case '\u1ED9': // ộ [LATIN SMALL LETTER O WITH CIRCUMFLEX AND DOT BELOW]
fallthrough
case '\u1EDB': // ớ [LATIN SMALL LETTER O WITH HORN AND ACUTE]
fallthrough
case '\u1EDD': // ờ [LATIN SMALL LETTER O WITH HORN AND GRAVE]
fallthrough
case '\u1EDF': // ở [LATIN SMALL LETTER O WITH HORN AND HOOK ABOVE]
fallthrough
case '\u1EE1': // ỡ [LATIN SMALL LETTER O WITH HORN AND TILDE]
fallthrough
case '\u1EE3': // ợ [LATIN SMALL LETTER O WITH HORN AND DOT BELOW]
fallthrough
case '\u2092': // ₒ [LATIN SUBSCRIPT SMALL LETTER O]
fallthrough
case '\u24DE': // ⓞ [CIRCLED LATIN SMALL LETTER O]
fallthrough
case '\u2C7A': // ⱺ [LATIN SMALL LETTER O WITH LOW RING INSIDE]
fallthrough
case '\uA74B': // ꝋ [LATIN SMALL LETTER O WITH LONG STROKE OVERLAY]
fallthrough
case '\uA74D': // ꝍ [LATIN SMALL LETTER O WITH LOOP]
fallthrough
case '\uFF4F': // o [FULLWIDTH LATIN SMALL LETTER O]
output[outputPos] = 'o'
outputPos++
case '\u0152': // Œ [LATIN CAPITAL LIGATURE OE]
fallthrough
case '\u0276': // ɶ [LATIN LETTER SMALL CAPITAL OE]
output = output[:(len(output) + 1)]
output[outputPos] = 'O'
outputPos++
output[outputPos] = 'E'
outputPos++
case '\uA74E': // Ꝏ [LATIN CAPITAL LETTER OO]
output = output[:(len(output) + 1)]
output[outputPos] = 'O'
outputPos++
output[outputPos] = 'O'
outputPos++
case '\u0222': // Ȣ http://en.wikipedia.org/wiki/OU [LATIN CAPITAL LETTER OU]
fallthrough
case '\u1D15': // ᴕ [LATIN LETTER SMALL CAPITAL OU]
output = output[:(len(output) + 1)]
output[outputPos] = 'O'
outputPos++
output[outputPos] = 'U'
outputPos++
case '\u24AA': // ⒪ [PARENTHESIZED LATIN SMALL LETTER O]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'o'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u0153': // œ [LATIN SMALL LIGATURE OE]
fallthrough
case '\u1D14': // ᴔ [LATIN SMALL LETTER TURNED OE]
output = output[:(len(output) + 1)]
output[outputPos] = 'o'
outputPos++
output[outputPos] = 'e'
outputPos++
case '\uA74F': // ꝏ [LATIN SMALL LETTER OO]
output = output[:(len(output) + 1)]
output[outputPos] = 'o'
outputPos++
output[outputPos] = 'o'
outputPos++
case '\u0223': // ȣ http://en.wikipedia.org/wiki/OU [LATIN SMALL LETTER OU]
output = output[:(len(output) + 1)]
output[outputPos] = 'o'
outputPos++
output[outputPos] = 'u'
outputPos++
case '\u01A4': // Ƥ [LATIN CAPITAL LETTER P WITH HOOK]
fallthrough
case '\u1D18': // ᴘ [LATIN LETTER SMALL CAPITAL P]
fallthrough
case '\u1E54': // Ṕ [LATIN CAPITAL LETTER P WITH ACUTE]
fallthrough
case '\u1E56': // Ṗ [LATIN CAPITAL LETTER P WITH DOT ABOVE]
fallthrough
case '\u24C5': // Ⓟ [CIRCLED LATIN CAPITAL LETTER P]
fallthrough
case '\u2C63': // Ᵽ [LATIN CAPITAL LETTER P WITH STROKE]
fallthrough
case '\uA750': // Ꝑ [LATIN CAPITAL LETTER P WITH STROKE THROUGH DESCENDER]
fallthrough
case '\uA752': // Ꝓ [LATIN CAPITAL LETTER P WITH FLOURISH]
fallthrough
case '\uA754': // Ꝕ [LATIN CAPITAL LETTER P WITH SQUIRREL TAIL]
fallthrough
case '\uFF30': // P [FULLWIDTH LATIN CAPITAL LETTER P]
output[outputPos] = 'P'
outputPos++
case '\u01A5': // ƥ [LATIN SMALL LETTER P WITH HOOK]
fallthrough
case '\u1D71': // ᵱ [LATIN SMALL LETTER P WITH MIDDLE TILDE]
fallthrough
case '\u1D7D': // ᵽ [LATIN SMALL LETTER P WITH STROKE]
fallthrough
case '\u1D88': // ᶈ [LATIN SMALL LETTER P WITH PALATAL HOOK]
fallthrough
case '\u1E55': // ṕ [LATIN SMALL LETTER P WITH ACUTE]
fallthrough
case '\u1E57': // ṗ [LATIN SMALL LETTER P WITH DOT ABOVE]
fallthrough
case '\u24DF': // ⓟ [CIRCLED LATIN SMALL LETTER P]
fallthrough
case '\uA751': // ꝑ [LATIN SMALL LETTER P WITH STROKE THROUGH DESCENDER]
fallthrough
case '\uA753': // ꝓ [LATIN SMALL LETTER P WITH FLOURISH]
fallthrough
case '\uA755': // ꝕ [LATIN SMALL LETTER P WITH SQUIRREL TAIL]
fallthrough
case '\uA7FC': // ꟼ [LATIN EPIGRAPHIC LETTER REVERSED P]
fallthrough
case '\uFF50': // p [FULLWIDTH LATIN SMALL LETTER P]
output[outputPos] = 'p'
outputPos++
case '\u24AB': // ⒫ [PARENTHESIZED LATIN SMALL LETTER P]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'p'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u024A': // Ɋ [LATIN CAPITAL LETTER SMALL Q WITH HOOK TAIL]
fallthrough
case '\u24C6': // Ⓠ [CIRCLED LATIN CAPITAL LETTER Q]
fallthrough
case '\uA756': // Ꝗ [LATIN CAPITAL LETTER Q WITH STROKE THROUGH DESCENDER]
fallthrough
case '\uA758': // Ꝙ [LATIN CAPITAL LETTER Q WITH DIAGONAL STROKE]
fallthrough
case '\uFF31': // Q [FULLWIDTH LATIN CAPITAL LETTER Q]
output[outputPos] = 'Q'
outputPos++
case '\u0138': // ĸ http://en.wikipedia.org/wiki/Kra_(letter) [LATIN SMALL LETTER KRA]
fallthrough
case '\u024B': // ɋ [LATIN SMALL LETTER Q WITH HOOK TAIL]
fallthrough
case '\u02A0': // ʠ [LATIN SMALL LETTER Q WITH HOOK]
fallthrough
case '\u24E0': // ⓠ [CIRCLED LATIN SMALL LETTER Q]
fallthrough
case '\uA757': // ꝗ [LATIN SMALL LETTER Q WITH STROKE THROUGH DESCENDER]
fallthrough
case '\uA759': // ꝙ [LATIN SMALL LETTER Q WITH DIAGONAL STROKE]
fallthrough
case '\uFF51': // q [FULLWIDTH LATIN SMALL LETTER Q]
output[outputPos] = 'q'
outputPos++
case '\u24AC': // ⒬ [PARENTHESIZED LATIN SMALL LETTER Q]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'q'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u0239': // ȹ [LATIN SMALL LETTER QP DIGRAPH]
output = output[:(len(output) + 1)]
output[outputPos] = 'q'
outputPos++
output[outputPos] = 'p'
outputPos++
case '\u0154': // Ŕ [LATIN CAPITAL LETTER R WITH ACUTE]
fallthrough
case '\u0156': // Ŗ [LATIN CAPITAL LETTER R WITH CEDILLA]
fallthrough
case '\u0158': // Ř [LATIN CAPITAL LETTER R WITH CARON]
fallthrough
case '\u0210': // Ȓ [LATIN CAPITAL LETTER R WITH DOUBLE GRAVE]
fallthrough
case '\u0212': // Ȓ [LATIN CAPITAL LETTER R WITH INVERTED BREVE]
fallthrough
case '\u024C': // Ɍ [LATIN CAPITAL LETTER R WITH STROKE]
fallthrough
case '\u0280': // ʀ [LATIN LETTER SMALL CAPITAL R]
fallthrough
case '\u0281': // ʁ [LATIN LETTER SMALL CAPITAL INVERTED R]
fallthrough
case '\u1D19': // ᴙ [LATIN LETTER SMALL CAPITAL REVERSED R]
fallthrough
case '\u1D1A': // ᴚ [LATIN LETTER SMALL CAPITAL TURNED R]
fallthrough
case '\u1E58': // Ṙ [LATIN CAPITAL LETTER R WITH DOT ABOVE]
fallthrough
case '\u1E5A': // Ṛ [LATIN CAPITAL LETTER R WITH DOT BELOW]
fallthrough
case '\u1E5C': // Ṝ [LATIN CAPITAL LETTER R WITH DOT BELOW AND MACRON]
fallthrough
case '\u1E5E': // Ṟ [LATIN CAPITAL LETTER R WITH LINE BELOW]
fallthrough
case '\u24C7': // Ⓡ [CIRCLED LATIN CAPITAL LETTER R]
fallthrough
case '\u2C64': // Ɽ [LATIN CAPITAL LETTER R WITH TAIL]
fallthrough
case '\uA75A': // Ꝛ [LATIN CAPITAL LETTER R ROTUNDA]
fallthrough
case '\uA782': // Ꞃ [LATIN CAPITAL LETTER INSULAR R]
fallthrough
case '\uFF32': // R [FULLWIDTH LATIN CAPITAL LETTER R]
output[outputPos] = 'R'
outputPos++
case '\u0155': // ŕ [LATIN SMALL LETTER R WITH ACUTE]
fallthrough
case '\u0157': // ŗ [LATIN SMALL LETTER R WITH CEDILLA]
fallthrough
case '\u0159': // ř [LATIN SMALL LETTER R WITH CARON]
fallthrough
case '\u0211': // ȑ [LATIN SMALL LETTER R WITH DOUBLE GRAVE]
fallthrough
case '\u0213': // ȓ [LATIN SMALL LETTER R WITH INVERTED BREVE]
fallthrough
case '\u024D': // ɍ [LATIN SMALL LETTER R WITH STROKE]
fallthrough
case '\u027C': // ɼ [LATIN SMALL LETTER R WITH LONG LEG]
fallthrough
case '\u027D': // ɽ [LATIN SMALL LETTER R WITH TAIL]
fallthrough
case '\u027E': // ɾ [LATIN SMALL LETTER R WITH FISHHOOK]
fallthrough
case '\u027F': // ɿ [LATIN SMALL LETTER REVERSED R WITH FISHHOOK]
fallthrough
case '\u1D63': // ᵣ [LATIN SUBSCRIPT SMALL LETTER R]
fallthrough
case '\u1D72': // ᵲ [LATIN SMALL LETTER R WITH MIDDLE TILDE]
fallthrough
case '\u1D73': // ᵳ [LATIN SMALL LETTER R WITH FISHHOOK AND MIDDLE TILDE]
fallthrough
case '\u1D89': // ᶉ [LATIN SMALL LETTER R WITH PALATAL HOOK]
fallthrough
case '\u1E59': // ṙ [LATIN SMALL LETTER R WITH DOT ABOVE]
fallthrough
case '\u1E5B': // ṛ [LATIN SMALL LETTER R WITH DOT BELOW]
fallthrough
case '\u1E5D': // ṝ [LATIN SMALL LETTER R WITH DOT BELOW AND MACRON]
fallthrough
case '\u1E5F': // ṟ [LATIN SMALL LETTER R WITH LINE BELOW]
fallthrough
case '\u24E1': // ⓡ [CIRCLED LATIN SMALL LETTER R]
fallthrough
case '\uA75B': // ꝛ [LATIN SMALL LETTER R ROTUNDA]
fallthrough
case '\uA783': // ꞃ [LATIN SMALL LETTER INSULAR R]
fallthrough
case '\uFF52': // r [FULLWIDTH LATIN SMALL LETTER R]
output[outputPos] = 'r'
outputPos++
case '\u24AD': // ⒭ [PARENTHESIZED LATIN SMALL LETTER R]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'r'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u015A': // Ś [LATIN CAPITAL LETTER S WITH ACUTE]
fallthrough
case '\u015C': // Ŝ [LATIN CAPITAL LETTER S WITH CIRCUMFLEX]
fallthrough
case '\u015E': // Ş [LATIN CAPITAL LETTER S WITH CEDILLA]
fallthrough
case '\u0160': // Š [LATIN CAPITAL LETTER S WITH CARON]
fallthrough
case '\u0218': // Ș [LATIN CAPITAL LETTER S WITH COMMA BELOW]
fallthrough
case '\u1E60': // Ṡ [LATIN CAPITAL LETTER S WITH DOT ABOVE]
fallthrough
case '\u1E62': // Ṣ [LATIN CAPITAL LETTER S WITH DOT BELOW]
fallthrough
case '\u1E64': // Ṥ [LATIN CAPITAL LETTER S WITH ACUTE AND DOT ABOVE]
fallthrough
case '\u1E66': // Ṧ [LATIN CAPITAL LETTER S WITH CARON AND DOT ABOVE]
fallthrough
case '\u1E68': // Ṩ [LATIN CAPITAL LETTER S WITH DOT BELOW AND DOT ABOVE]
fallthrough
case '\u24C8': // Ⓢ [CIRCLED LATIN CAPITAL LETTER S]
fallthrough
case '\uA731': // ꜱ [LATIN LETTER SMALL CAPITAL S]
fallthrough
case '\uA785': // ꞅ [LATIN SMALL LETTER INSULAR S]
fallthrough
case '\uFF33': // S [FULLWIDTH LATIN CAPITAL LETTER S]
output[outputPos] = 'S'
outputPos++
case '\u015B': // ś [LATIN SMALL LETTER S WITH ACUTE]
fallthrough
case '\u015D': // ŝ [LATIN SMALL LETTER S WITH CIRCUMFLEX]
fallthrough
case '\u015F': // ş [LATIN SMALL LETTER S WITH CEDILLA]
fallthrough
case '\u0161': // š [LATIN SMALL LETTER S WITH CARON]
fallthrough
case '\u017F': // ſ http://en.wikipedia.org/wiki/Long_S [LATIN SMALL LETTER LONG S]
fallthrough
case '\u0219': // ș [LATIN SMALL LETTER S WITH COMMA BELOW]
fallthrough
case '\u023F': // ȿ [LATIN SMALL LETTER S WITH SWASH TAIL]
fallthrough
case '\u0282': // ʂ [LATIN SMALL LETTER S WITH HOOK]
fallthrough
case '\u1D74': // ᵴ [LATIN SMALL LETTER S WITH MIDDLE TILDE]
fallthrough
case '\u1D8A': // ᶊ [LATIN SMALL LETTER S WITH PALATAL HOOK]
fallthrough
case '\u1E61': // ṡ [LATIN SMALL LETTER S WITH DOT ABOVE]
fallthrough
case '\u1E63': // ṣ [LATIN SMALL LETTER S WITH DOT BELOW]
fallthrough
case '\u1E65': // ṥ [LATIN SMALL LETTER S WITH ACUTE AND DOT ABOVE]
fallthrough
case '\u1E67': // ṧ [LATIN SMALL LETTER S WITH CARON AND DOT ABOVE]
fallthrough
case '\u1E69': // ṩ [LATIN SMALL LETTER S WITH DOT BELOW AND DOT ABOVE]
fallthrough
case '\u1E9C': // ẜ [LATIN SMALL LETTER LONG S WITH DIAGONAL STROKE]
fallthrough
case '\u1E9D': // ẝ [LATIN SMALL LETTER LONG S WITH HIGH STROKE]
fallthrough
case '\u24E2': // ⓢ [CIRCLED LATIN SMALL LETTER S]
fallthrough
case '\uA784': // Ꞅ [LATIN CAPITAL LETTER INSULAR S]
fallthrough
case '\uFF53': // s [FULLWIDTH LATIN SMALL LETTER S]
output[outputPos] = 's'
outputPos++
case '\u1E9E': // ẞ [LATIN CAPITAL LETTER SHARP S]
output = output[:(len(output) + 1)]
output[outputPos] = 'S'
outputPos++
output[outputPos] = 'S'
outputPos++
case '\u24AE': // ⒮ [PARENTHESIZED LATIN SMALL LETTER S]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 's'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u00DF': // ß [LATIN SMALL LETTER SHARP S]
output = output[:(len(output) + 1)]
output[outputPos] = 's'
outputPos++
output[outputPos] = 's'
outputPos++
case '\uFB06': // st [LATIN SMALL LIGATURE ST]
output = output[:(len(output) + 1)]
output[outputPos] = 's'
outputPos++
output[outputPos] = 't'
outputPos++
case '\u0162': // Ţ [LATIN CAPITAL LETTER T WITH CEDILLA]
fallthrough
case '\u0164': // Ť [LATIN CAPITAL LETTER T WITH CARON]
fallthrough
case '\u0166': // Ŧ [LATIN CAPITAL LETTER T WITH STROKE]
fallthrough
case '\u01AC': // Ƭ [LATIN CAPITAL LETTER T WITH HOOK]
fallthrough
case '\u01AE': // Ʈ [LATIN CAPITAL LETTER T WITH RETROFLEX HOOK]
fallthrough
case '\u021A': // Ț [LATIN CAPITAL LETTER T WITH COMMA BELOW]
fallthrough
case '\u023E': // Ⱦ [LATIN CAPITAL LETTER T WITH DIAGONAL STROKE]
fallthrough
case '\u1D1B': // ᴛ [LATIN LETTER SMALL CAPITAL T]
fallthrough
case '\u1E6A': // Ṫ [LATIN CAPITAL LETTER T WITH DOT ABOVE]
fallthrough
case '\u1E6C': // Ṭ [LATIN CAPITAL LETTER T WITH DOT BELOW]
fallthrough
case '\u1E6E': // Ṯ [LATIN CAPITAL LETTER T WITH LINE BELOW]
fallthrough
case '\u1E70': // Ṱ [LATIN CAPITAL LETTER T WITH CIRCUMFLEX BELOW]
fallthrough
case '\u24C9': // Ⓣ [CIRCLED LATIN CAPITAL LETTER T]
fallthrough
case '\uA786': // Ꞇ [LATIN CAPITAL LETTER INSULAR T]
fallthrough
case '\uFF34': // T [FULLWIDTH LATIN CAPITAL LETTER T]
output[outputPos] = 'T'
outputPos++
case '\u0163': // ţ [LATIN SMALL LETTER T WITH CEDILLA]
fallthrough
case '\u0165': // ť [LATIN SMALL LETTER T WITH CARON]
fallthrough
case '\u0167': // ŧ [LATIN SMALL LETTER T WITH STROKE]
fallthrough
case '\u01AB': // ƫ [LATIN SMALL LETTER T WITH PALATAL HOOK]
fallthrough
case '\u01AD': // ƭ [LATIN SMALL LETTER T WITH HOOK]
fallthrough
case '\u021B': // ț [LATIN SMALL LETTER T WITH COMMA BELOW]
fallthrough
case '\u0236': // ȶ [LATIN SMALL LETTER T WITH CURL]
fallthrough
case '\u0287': // ʇ [LATIN SMALL LETTER TURNED T]
fallthrough
case '\u0288': // ʈ [LATIN SMALL LETTER T WITH RETROFLEX HOOK]
fallthrough
case '\u1D75': // ᵵ [LATIN SMALL LETTER T WITH MIDDLE TILDE]
fallthrough
case '\u1E6B': // ṫ [LATIN SMALL LETTER T WITH DOT ABOVE]
fallthrough
case '\u1E6D': // ṭ [LATIN SMALL LETTER T WITH DOT BELOW]
fallthrough
case '\u1E6F': // ṯ [LATIN SMALL LETTER T WITH LINE BELOW]
fallthrough
case '\u1E71': // ṱ [LATIN SMALL LETTER T WITH CIRCUMFLEX BELOW]
fallthrough
case '\u1E97': // ẗ [LATIN SMALL LETTER T WITH DIAERESIS]
fallthrough
case '\u24E3': // ⓣ [CIRCLED LATIN SMALL LETTER T]
fallthrough
case '\u2C66': // ⱦ [LATIN SMALL LETTER T WITH DIAGONAL STROKE]
fallthrough
case '\uFF54': // t [FULLWIDTH LATIN SMALL LETTER T]
output[outputPos] = 't'
outputPos++
case '\u00DE': // Þ [LATIN CAPITAL LETTER THORN]
fallthrough
case '\uA766': // Ꝧ [LATIN CAPITAL LETTER THORN WITH STROKE THROUGH DESCENDER]
output = output[:(len(output) + 1)]
output[outputPos] = 'T'
outputPos++
output[outputPos] = 'H'
outputPos++
case '\uA728': // Ꜩ [LATIN CAPITAL LETTER TZ]
output = output[:(len(output) + 1)]
output[outputPos] = 'T'
outputPos++
output[outputPos] = 'Z'
outputPos++
case '\u24AF': // ⒯ [PARENTHESIZED LATIN SMALL LETTER T]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 't'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u02A8': // ʨ [LATIN SMALL LETTER TC DIGRAPH WITH CURL]
output = output[:(len(output) + 1)]
output[outputPos] = 't'
outputPos++
output[outputPos] = 'c'
outputPos++
case '\u00FE': // þ [LATIN SMALL LETTER THORN]
fallthrough
case '\u1D7A': // ᵺ [LATIN SMALL LETTER TH WITH STRIKETHROUGH]
fallthrough
case '\uA767': // ꝧ [LATIN SMALL LETTER THORN WITH STROKE THROUGH DESCENDER]
output = output[:(len(output) + 1)]
output[outputPos] = 't'
outputPos++
output[outputPos] = 'h'
outputPos++
case '\u02A6': // ʦ [LATIN SMALL LETTER TS DIGRAPH]
output = output[:(len(output) + 1)]
output[outputPos] = 't'
outputPos++
output[outputPos] = 's'
outputPos++
case '\uA729': // ꜩ [LATIN SMALL LETTER TZ]
output = output[:(len(output) + 1)]
output[outputPos] = 't'
outputPos++
output[outputPos] = 'z'
outputPos++
case '\u00D9': // Ù [LATIN CAPITAL LETTER U WITH GRAVE]
fallthrough
case '\u00DA': // Ú [LATIN CAPITAL LETTER U WITH ACUTE]
fallthrough
case '\u00DB': // Û [LATIN CAPITAL LETTER U WITH CIRCUMFLEX]
fallthrough
case '\u00DC': // Ü [LATIN CAPITAL LETTER U WITH DIAERESIS]
fallthrough
case '\u0168': // Ũ [LATIN CAPITAL LETTER U WITH TILDE]
fallthrough
case '\u016A': // Ū [LATIN CAPITAL LETTER U WITH MACRON]
fallthrough
case '\u016C': // Ŭ [LATIN CAPITAL LETTER U WITH BREVE]
fallthrough
case '\u016E': // Ů [LATIN CAPITAL LETTER U WITH RING ABOVE]
fallthrough
case '\u0170': // Ű [LATIN CAPITAL LETTER U WITH DOUBLE ACUTE]
fallthrough
case '\u0172': // Ų [LATIN CAPITAL LETTER U WITH OGONEK]
fallthrough
case '\u01AF': // Ư [LATIN CAPITAL LETTER U WITH HORN]
fallthrough
case '\u01D3': // Ǔ [LATIN CAPITAL LETTER U WITH CARON]
fallthrough
case '\u01D5': // Ǖ [LATIN CAPITAL LETTER U WITH DIAERESIS AND MACRON]
fallthrough
case '\u01D7': // Ǘ [LATIN CAPITAL LETTER U WITH DIAERESIS AND ACUTE]
fallthrough
case '\u01D9': // Ǚ [LATIN CAPITAL LETTER U WITH DIAERESIS AND CARON]
fallthrough
case '\u01DB': // Ǜ [LATIN CAPITAL LETTER U WITH DIAERESIS AND GRAVE]
fallthrough
case '\u0214': // Ȕ [LATIN CAPITAL LETTER U WITH DOUBLE GRAVE]
fallthrough
case '\u0216': // Ȗ [LATIN CAPITAL LETTER U WITH INVERTED BREVE]
fallthrough
case '\u0244': // Ʉ [LATIN CAPITAL LETTER U BAR]
fallthrough
case '\u1D1C': // ᴜ [LATIN LETTER SMALL CAPITAL U]
fallthrough
case '\u1D7E': // ᵾ [LATIN SMALL CAPITAL LETTER U WITH STROKE]
fallthrough
case '\u1E72': // Ṳ [LATIN CAPITAL LETTER U WITH DIAERESIS BELOW]
fallthrough
case '\u1E74': // Ṵ [LATIN CAPITAL LETTER U WITH TILDE BELOW]
fallthrough
case '\u1E76': // Ṷ [LATIN CAPITAL LETTER U WITH CIRCUMFLEX BELOW]
fallthrough
case '\u1E78': // Ṹ [LATIN CAPITAL LETTER U WITH TILDE AND ACUTE]
fallthrough
case '\u1E7A': // Ṻ [LATIN CAPITAL LETTER U WITH MACRON AND DIAERESIS]
fallthrough
case '\u1EE4': // Ụ [LATIN CAPITAL LETTER U WITH DOT BELOW]
fallthrough
case '\u1EE6': // Ủ [LATIN CAPITAL LETTER U WITH HOOK ABOVE]
fallthrough
case '\u1EE8': // Ứ [LATIN CAPITAL LETTER U WITH HORN AND ACUTE]
fallthrough
case '\u1EEA': // Ừ [LATIN CAPITAL LETTER U WITH HORN AND GRAVE]
fallthrough
case '\u1EEC': // Ử [LATIN CAPITAL LETTER U WITH HORN AND HOOK ABOVE]
fallthrough
case '\u1EEE': // Ữ [LATIN CAPITAL LETTER U WITH HORN AND TILDE]
fallthrough
case '\u1EF0': // Ự [LATIN CAPITAL LETTER U WITH HORN AND DOT BELOW]
fallthrough
case '\u24CA': // Ⓤ [CIRCLED LATIN CAPITAL LETTER U]
fallthrough
case '\uFF35': // U [FULLWIDTH LATIN CAPITAL LETTER U]
output[outputPos] = 'U'
outputPos++
case '\u00F9': // ù [LATIN SMALL LETTER U WITH GRAVE]
fallthrough
case '\u00FA': // ú [LATIN SMALL LETTER U WITH ACUTE]
fallthrough
case '\u00FB': // û [LATIN SMALL LETTER U WITH CIRCUMFLEX]
fallthrough
case '\u00FC': // ü [LATIN SMALL LETTER U WITH DIAERESIS]
fallthrough
case '\u0169': // ũ [LATIN SMALL LETTER U WITH TILDE]
fallthrough
case '\u016B': // ū [LATIN SMALL LETTER U WITH MACRON]
fallthrough
case '\u016D': // ŭ [LATIN SMALL LETTER U WITH BREVE]
fallthrough
case '\u016F': // ů [LATIN SMALL LETTER U WITH RING ABOVE]
fallthrough
case '\u0171': // ű [LATIN SMALL LETTER U WITH DOUBLE ACUTE]
fallthrough
case '\u0173': // ų [LATIN SMALL LETTER U WITH OGONEK]
fallthrough
case '\u01B0': // ư [LATIN SMALL LETTER U WITH HORN]
fallthrough
case '\u01D4': // ǔ [LATIN SMALL LETTER U WITH CARON]
fallthrough
case '\u01D6': // ǖ [LATIN SMALL LETTER U WITH DIAERESIS AND MACRON]
fallthrough
case '\u01D8': // ǘ [LATIN SMALL LETTER U WITH DIAERESIS AND ACUTE]
fallthrough
case '\u01DA': // ǚ [LATIN SMALL LETTER U WITH DIAERESIS AND CARON]
fallthrough
case '\u01DC': // ǜ [LATIN SMALL LETTER U WITH DIAERESIS AND GRAVE]
fallthrough
case '\u0215': // ȕ [LATIN SMALL LETTER U WITH DOUBLE GRAVE]
fallthrough
case '\u0217': // ȗ [LATIN SMALL LETTER U WITH INVERTED BREVE]
fallthrough
case '\u0289': // ʉ [LATIN SMALL LETTER U BAR]
fallthrough
case '\u1D64': // ᵤ [LATIN SUBSCRIPT SMALL LETTER U]
fallthrough
case '\u1D99': // ᶙ [LATIN SMALL LETTER U WITH RETROFLEX HOOK]
fallthrough
case '\u1E73': // ṳ [LATIN SMALL LETTER U WITH DIAERESIS BELOW]
fallthrough
case '\u1E75': // ṵ [LATIN SMALL LETTER U WITH TILDE BELOW]
fallthrough
case '\u1E77': // ṷ [LATIN SMALL LETTER U WITH CIRCUMFLEX BELOW]
fallthrough
case '\u1E79': // ṹ [LATIN SMALL LETTER U WITH TILDE AND ACUTE]
fallthrough
case '\u1E7B': // ṻ [LATIN SMALL LETTER U WITH MACRON AND DIAERESIS]
fallthrough
case '\u1EE5': // ụ [LATIN SMALL LETTER U WITH DOT BELOW]
fallthrough
case '\u1EE7': // ủ [LATIN SMALL LETTER U WITH HOOK ABOVE]
fallthrough
case '\u1EE9': // ứ [LATIN SMALL LETTER U WITH HORN AND ACUTE]
fallthrough
case '\u1EEB': // ừ [LATIN SMALL LETTER U WITH HORN AND GRAVE]
fallthrough
case '\u1EED': // ử [LATIN SMALL LETTER U WITH HORN AND HOOK ABOVE]
fallthrough
case '\u1EEF': // ữ [LATIN SMALL LETTER U WITH HORN AND TILDE]
fallthrough
case '\u1EF1': // ự [LATIN SMALL LETTER U WITH HORN AND DOT BELOW]
fallthrough
case '\u24E4': // ⓤ [CIRCLED LATIN SMALL LETTER U]
fallthrough
case '\uFF55': // u [FULLWIDTH LATIN SMALL LETTER U]
output[outputPos] = 'u'
outputPos++
case '\u24B0': // ⒰ [PARENTHESIZED LATIN SMALL LETTER U]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'u'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u1D6B': // ᵫ [LATIN SMALL LETTER UE]
output = output[:(len(output) + 1)]
output[outputPos] = 'u'
outputPos++
output[outputPos] = 'e'
outputPos++
case '\u01B2': // Ʋ [LATIN CAPITAL LETTER V WITH HOOK]
fallthrough
case '\u0245': // Ʌ [LATIN CAPITAL LETTER TURNED V]
fallthrough
case '\u1D20': // ᴠ [LATIN LETTER SMALL CAPITAL V]
fallthrough
case '\u1E7C': // Ṽ [LATIN CAPITAL LETTER V WITH TILDE]
fallthrough
case '\u1E7E': // Ṿ [LATIN CAPITAL LETTER V WITH DOT BELOW]
fallthrough
case '\u1EFC': // Ỽ [LATIN CAPITAL LETTER MIDDLE-WELSH V]
fallthrough
case '\u24CB': // Ⓥ [CIRCLED LATIN CAPITAL LETTER V]
fallthrough
case '\uA75E': // Ꝟ [LATIN CAPITAL LETTER V WITH DIAGONAL STROKE]
fallthrough
case '\uA768': // Ꝩ [LATIN CAPITAL LETTER VEND]
fallthrough
case '\uFF36': // V [FULLWIDTH LATIN CAPITAL LETTER V]
output[outputPos] = 'V'
outputPos++
case '\u028B': // ʋ [LATIN SMALL LETTER V WITH HOOK]
fallthrough
case '\u028C': // ʌ [LATIN SMALL LETTER TURNED V]
fallthrough
case '\u1D65': // ᵥ [LATIN SUBSCRIPT SMALL LETTER V]
fallthrough
case '\u1D8C': // ᶌ [LATIN SMALL LETTER V WITH PALATAL HOOK]
fallthrough
case '\u1E7D': // ṽ [LATIN SMALL LETTER V WITH TILDE]
fallthrough
case '\u1E7F': // ṿ [LATIN SMALL LETTER V WITH DOT BELOW]
fallthrough
case '\u24E5': // ⓥ [CIRCLED LATIN SMALL LETTER V]
fallthrough
case '\u2C71': // ⱱ [LATIN SMALL LETTER V WITH RIGHT HOOK]
fallthrough
case '\u2C74': // ⱴ [LATIN SMALL LETTER V WITH CURL]
fallthrough
case '\uA75F': // ꝟ [LATIN SMALL LETTER V WITH DIAGONAL STROKE]
fallthrough
case '\uFF56': // v [FULLWIDTH LATIN SMALL LETTER V]
output[outputPos] = 'v'
outputPos++
case '\uA760': // Ꝡ [LATIN CAPITAL LETTER VY]
output = output[:(len(output) + 1)]
output[outputPos] = 'V'
outputPos++
output[outputPos] = 'Y'
outputPos++
case '\u24B1': // ⒱ [PARENTHESIZED LATIN SMALL LETTER V]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'v'
outputPos++
output[outputPos] = ')'
outputPos++
case '\uA761': // ꝡ [LATIN SMALL LETTER VY]
output = output[:(len(output) + 1)]
output[outputPos] = 'v'
outputPos++
output[outputPos] = 'y'
outputPos++
case '\u0174': // Ŵ [LATIN CAPITAL LETTER W WITH CIRCUMFLEX]
fallthrough
case '\u01F7': // Ƿ http://en.wikipedia.org/wiki/Wynn [LATIN CAPITAL LETTER WYNN]
fallthrough
case '\u1D21': // ᴡ [LATIN LETTER SMALL CAPITAL W]
fallthrough
case '\u1E80': // Ẁ [LATIN CAPITAL LETTER W WITH GRAVE]
fallthrough
case '\u1E82': // Ẃ [LATIN CAPITAL LETTER W WITH ACUTE]
fallthrough
case '\u1E84': // Ẅ [LATIN CAPITAL LETTER W WITH DIAERESIS]
fallthrough
case '\u1E86': // Ẇ [LATIN CAPITAL LETTER W WITH DOT ABOVE]
fallthrough
case '\u1E88': // Ẉ [LATIN CAPITAL LETTER W WITH DOT BELOW]
fallthrough
case '\u24CC': // Ⓦ [CIRCLED LATIN CAPITAL LETTER W]
fallthrough
case '\u2C72': // Ⱳ [LATIN CAPITAL LETTER W WITH HOOK]
fallthrough
case '\uFF37': // W [FULLWIDTH LATIN CAPITAL LETTER W]
output[outputPos] = 'W'
outputPos++
case '\u0175': // ŵ [LATIN SMALL LETTER W WITH CIRCUMFLEX]
fallthrough
case '\u01BF': // ƿ http://en.wikipedia.org/wiki/Wynn [LATIN LETTER WYNN]
fallthrough
case '\u028D': // ʍ [LATIN SMALL LETTER TURNED W]
fallthrough
case '\u1E81': // ẁ [LATIN SMALL LETTER W WITH GRAVE]
fallthrough
case '\u1E83': // ẃ [LATIN SMALL LETTER W WITH ACUTE]
fallthrough
case '\u1E85': // ẅ [LATIN SMALL LETTER W WITH DIAERESIS]
fallthrough
case '\u1E87': // ẇ [LATIN SMALL LETTER W WITH DOT ABOVE]
fallthrough
case '\u1E89': // ẉ [LATIN SMALL LETTER W WITH DOT BELOW]
fallthrough
case '\u1E98': // ẘ [LATIN SMALL LETTER W WITH RING ABOVE]
fallthrough
case '\u24E6': // ⓦ [CIRCLED LATIN SMALL LETTER W]
fallthrough
case '\u2C73': // ⱳ [LATIN SMALL LETTER W WITH HOOK]
fallthrough
case '\uFF57': // w [FULLWIDTH LATIN SMALL LETTER W]
output[outputPos] = 'w'
outputPos++
case '\u24B2': // ⒲ [PARENTHESIZED LATIN SMALL LETTER W]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'w'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u1E8A': // Ẋ [LATIN CAPITAL LETTER X WITH DOT ABOVE]
fallthrough
case '\u1E8C': // Ẍ [LATIN CAPITAL LETTER X WITH DIAERESIS]
fallthrough
case '\u24CD': // Ⓧ [CIRCLED LATIN CAPITAL LETTER X]
fallthrough
case '\uFF38': // X [FULLWIDTH LATIN CAPITAL LETTER X]
output[outputPos] = 'X'
outputPos++
case '\u1D8D': // ᶍ [LATIN SMALL LETTER X WITH PALATAL HOOK]
fallthrough
case '\u1E8B': // ẋ [LATIN SMALL LETTER X WITH DOT ABOVE]
fallthrough
case '\u1E8D': // ẍ [LATIN SMALL LETTER X WITH DIAERESIS]
fallthrough
case '\u2093': // ₓ [LATIN SUBSCRIPT SMALL LETTER X]
fallthrough
case '\u24E7': // ⓧ [CIRCLED LATIN SMALL LETTER X]
fallthrough
case '\uFF58': // x [FULLWIDTH LATIN SMALL LETTER X]
output[outputPos] = 'x'
outputPos++
case '\u24B3': // ⒳ [PARENTHESIZED LATIN SMALL LETTER X]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'x'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u00DD': // Ý [LATIN CAPITAL LETTER Y WITH ACUTE]
fallthrough
case '\u0176': // Ŷ [LATIN CAPITAL LETTER Y WITH CIRCUMFLEX]
fallthrough
case '\u0178': // Ÿ [LATIN CAPITAL LETTER Y WITH DIAERESIS]
fallthrough
case '\u01B3': // Ƴ [LATIN CAPITAL LETTER Y WITH HOOK]
fallthrough
case '\u0232': // Ȳ [LATIN CAPITAL LETTER Y WITH MACRON]
fallthrough
case '\u024E': // Ɏ [LATIN CAPITAL LETTER Y WITH STROKE]
fallthrough
case '\u028F': // ʏ [LATIN LETTER SMALL CAPITAL Y]
fallthrough
case '\u1E8E': // Ẏ [LATIN CAPITAL LETTER Y WITH DOT ABOVE]
fallthrough
case '\u1EF2': // Ỳ [LATIN CAPITAL LETTER Y WITH GRAVE]
fallthrough
case '\u1EF4': // Ỵ [LATIN CAPITAL LETTER Y WITH DOT BELOW]
fallthrough
case '\u1EF6': // Ỷ [LATIN CAPITAL LETTER Y WITH HOOK ABOVE]
fallthrough
case '\u1EF8': // Ỹ [LATIN CAPITAL LETTER Y WITH TILDE]
fallthrough
case '\u1EFE': // Ỿ [LATIN CAPITAL LETTER Y WITH LOOP]
fallthrough
case '\u24CE': // Ⓨ [CIRCLED LATIN CAPITAL LETTER Y]
fallthrough
case '\uFF39': // Y [FULLWIDTH LATIN CAPITAL LETTER Y]
output[outputPos] = 'Y'
outputPos++
case '\u00FD': // ý [LATIN SMALL LETTER Y WITH ACUTE]
fallthrough
case '\u00FF': // ÿ [LATIN SMALL LETTER Y WITH DIAERESIS]
fallthrough
case '\u0177': // ŷ [LATIN SMALL LETTER Y WITH CIRCUMFLEX]
fallthrough
case '\u01B4': // ƴ [LATIN SMALL LETTER Y WITH HOOK]
fallthrough
case '\u0233': // ȳ [LATIN SMALL LETTER Y WITH MACRON]
fallthrough
case '\u024F': // ɏ [LATIN SMALL LETTER Y WITH STROKE]
fallthrough
case '\u028E': // ʎ [LATIN SMALL LETTER TURNED Y]
fallthrough
case '\u1E8F': // ẏ [LATIN SMALL LETTER Y WITH DOT ABOVE]
fallthrough
case '\u1E99': // ẙ [LATIN SMALL LETTER Y WITH RING ABOVE]
fallthrough
case '\u1EF3': // ỳ [LATIN SMALL LETTER Y WITH GRAVE]
fallthrough
case '\u1EF5': // ỵ [LATIN SMALL LETTER Y WITH DOT BELOW]
fallthrough
case '\u1EF7': // ỷ [LATIN SMALL LETTER Y WITH HOOK ABOVE]
fallthrough
case '\u1EF9': // ỹ [LATIN SMALL LETTER Y WITH TILDE]
fallthrough
case '\u1EFF': // ỿ [LATIN SMALL LETTER Y WITH LOOP]
fallthrough
case '\u24E8': // ⓨ [CIRCLED LATIN SMALL LETTER Y]
fallthrough
case '\uFF59': // y [FULLWIDTH LATIN SMALL LETTER Y]
output[outputPos] = 'y'
outputPos++
case '\u24B4': // ⒴ [PARENTHESIZED LATIN SMALL LETTER Y]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'y'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u0179': // Ź [LATIN CAPITAL LETTER Z WITH ACUTE]
fallthrough
case '\u017B': // Ż [LATIN CAPITAL LETTER Z WITH DOT ABOVE]
fallthrough
case '\u017D': // Ž [LATIN CAPITAL LETTER Z WITH CARON]
fallthrough
case '\u01B5': // Ƶ [LATIN CAPITAL LETTER Z WITH STROKE]
fallthrough
case '\u021C': // Ȝ http://en.wikipedia.org/wiki/Yogh [LATIN CAPITAL LETTER YOGH]
fallthrough
case '\u0224': // Ȥ [LATIN CAPITAL LETTER Z WITH HOOK]
fallthrough
case '\u1D22': // ᴢ [LATIN LETTER SMALL CAPITAL Z]
fallthrough
case '\u1E90': // Ẑ [LATIN CAPITAL LETTER Z WITH CIRCUMFLEX]
fallthrough
case '\u1E92': // Ẓ [LATIN CAPITAL LETTER Z WITH DOT BELOW]
fallthrough
case '\u1E94': // Ẕ [LATIN CAPITAL LETTER Z WITH LINE BELOW]
fallthrough
case '\u24CF': // Ⓩ [CIRCLED LATIN CAPITAL LETTER Z]
fallthrough
case '\u2C6B': // Ⱬ [LATIN CAPITAL LETTER Z WITH DESCENDER]
fallthrough
case '\uA762': // Ꝣ [LATIN CAPITAL LETTER VISIGOTHIC Z]
fallthrough
case '\uFF3A': // Z [FULLWIDTH LATIN CAPITAL LETTER Z]
output[outputPos] = 'Z'
outputPos++
case '\u017A': // ź [LATIN SMALL LETTER Z WITH ACUTE]
fallthrough
case '\u017C': // ż [LATIN SMALL LETTER Z WITH DOT ABOVE]
fallthrough
case '\u017E': // ž [LATIN SMALL LETTER Z WITH CARON]
fallthrough
case '\u01B6': // ƶ [LATIN SMALL LETTER Z WITH STROKE]
fallthrough
case '\u021D': // ȝ http://en.wikipedia.org/wiki/Yogh [LATIN SMALL LETTER YOGH]
fallthrough
case '\u0225': // ȥ [LATIN SMALL LETTER Z WITH HOOK]
fallthrough
case '\u0240': // ɀ [LATIN SMALL LETTER Z WITH SWASH TAIL]
fallthrough
case '\u0290': // ʐ [LATIN SMALL LETTER Z WITH RETROFLEX HOOK]
fallthrough
case '\u0291': // ʑ [LATIN SMALL LETTER Z WITH CURL]
fallthrough
case '\u1D76': // ᵶ [LATIN SMALL LETTER Z WITH MIDDLE TILDE]
fallthrough
case '\u1D8E': // ᶎ [LATIN SMALL LETTER Z WITH PALATAL HOOK]
fallthrough
case '\u1E91': // ẑ [LATIN SMALL LETTER Z WITH CIRCUMFLEX]
fallthrough
case '\u1E93': // ẓ [LATIN SMALL LETTER Z WITH DOT BELOW]
fallthrough
case '\u1E95': // ẕ [LATIN SMALL LETTER Z WITH LINE BELOW]
fallthrough
case '\u24E9': // ⓩ [CIRCLED LATIN SMALL LETTER Z]
fallthrough
case '\u2C6C': // ⱬ [LATIN SMALL LETTER Z WITH DESCENDER]
fallthrough
case '\uA763': // ꝣ [LATIN SMALL LETTER VISIGOTHIC Z]
fallthrough
case '\uFF5A': // z [FULLWIDTH LATIN SMALL LETTER Z]
output[outputPos] = 'z'
outputPos++
case '\u24B5': // ⒵ [PARENTHESIZED LATIN SMALL LETTER Z]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = 'z'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u2070': // ⁰ [SUPERSCRIPT ZERO]
fallthrough
case '\u2080': // ₀ [SUBSCRIPT ZERO]
fallthrough
case '\u24EA': // ⓪ [CIRCLED DIGIT ZERO]
fallthrough
case '\u24FF': // ⓿ [NEGATIVE CIRCLED DIGIT ZERO]
fallthrough
case '\uFF10': // 0 [FULLWIDTH DIGIT ZERO]
output[outputPos] = '0'
outputPos++
case '\u00B9': // ¹ [SUPERSCRIPT ONE]
fallthrough
case '\u2081': // ₁ [SUBSCRIPT ONE]
fallthrough
case '\u2460': // ① [CIRCLED DIGIT ONE]
fallthrough
case '\u24F5': // ⓵ [DOUBLE CIRCLED DIGIT ONE]
fallthrough
case '\u2776': // ❶ [DINGBAT NEGATIVE CIRCLED DIGIT ONE]
fallthrough
case '\u2780': // ➀ [DINGBAT CIRCLED SANS-SERIF DIGIT ONE]
fallthrough
case '\u278A': // ➊ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT ONE]
fallthrough
case '\uFF11': // 1 [FULLWIDTH DIGIT ONE]
output[outputPos] = '1'
outputPos++
case '\u2488': // ⒈ [DIGIT ONE FULL STOP]
output = output[:(len(output) + 1)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u2474': // ⑴ [PARENTHESIZED DIGIT ONE]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = '1'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u00B2': // ² [SUPERSCRIPT TWO]
fallthrough
case '\u2082': // ₂ [SUBSCRIPT TWO]
fallthrough
case '\u2461': // ② [CIRCLED DIGIT TWO]
fallthrough
case '\u24F6': // ⓶ [DOUBLE CIRCLED DIGIT TWO]
fallthrough
case '\u2777': // ❷ [DINGBAT NEGATIVE CIRCLED DIGIT TWO]
fallthrough
case '\u2781': // ➁ [DINGBAT CIRCLED SANS-SERIF DIGIT TWO]
fallthrough
case '\u278B': // ➋ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT TWO]
fallthrough
case '\uFF12': // 2 [FULLWIDTH DIGIT TWO]
output[outputPos] = '2'
outputPos++
case '\u2489': // ⒉ [DIGIT TWO FULL STOP]
output = output[:(len(output) + 1)]
output[outputPos] = '2'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u2475': // ⑵ [PARENTHESIZED DIGIT TWO]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = '2'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u00B3': // ³ [SUPERSCRIPT THREE]
fallthrough
case '\u2083': // ₃ [SUBSCRIPT THREE]
fallthrough
case '\u2462': // ③ [CIRCLED DIGIT THREE]
fallthrough
case '\u24F7': // ⓷ [DOUBLE CIRCLED DIGIT THREE]
fallthrough
case '\u2778': // ❸ [DINGBAT NEGATIVE CIRCLED DIGIT THREE]
fallthrough
case '\u2782': // ➂ [DINGBAT CIRCLED SANS-SERIF DIGIT THREE]
fallthrough
case '\u278C': // ➌ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT THREE]
fallthrough
case '\uFF13': // 3 [FULLWIDTH DIGIT THREE]
output[outputPos] = '3'
outputPos++
case '\u248A': // ⒊ [DIGIT THREE FULL STOP]
output = output[:(len(output) + 1)]
output[outputPos] = '3'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u2476': // ⑶ [PARENTHESIZED DIGIT THREE]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = '3'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u2074': // ⁴ [SUPERSCRIPT FOUR]
fallthrough
case '\u2084': // ₄ [SUBSCRIPT FOUR]
fallthrough
case '\u2463': // ④ [CIRCLED DIGIT FOUR]
fallthrough
case '\u24F8': // ⓸ [DOUBLE CIRCLED DIGIT FOUR]
fallthrough
case '\u2779': // ❹ [DINGBAT NEGATIVE CIRCLED DIGIT FOUR]
fallthrough
case '\u2783': // ➃ [DINGBAT CIRCLED SANS-SERIF DIGIT FOUR]
fallthrough
case '\u278D': // ➍ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FOUR]
fallthrough
case '\uFF14': // 4 [FULLWIDTH DIGIT FOUR]
output[outputPos] = '4'
outputPos++
case '\u248B': // ⒋ [DIGIT FOUR FULL STOP]
output = output[:(len(output) + 1)]
output[outputPos] = '4'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u2477': // ⑷ [PARENTHESIZED DIGIT FOUR]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = '4'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u2075': // ⁵ [SUPERSCRIPT FIVE]
fallthrough
case '\u2085': // ₅ [SUBSCRIPT FIVE]
fallthrough
case '\u2464': // ⑤ [CIRCLED DIGIT FIVE]
fallthrough
case '\u24F9': // ⓹ [DOUBLE CIRCLED DIGIT FIVE]
fallthrough
case '\u277A': // ❺ [DINGBAT NEGATIVE CIRCLED DIGIT FIVE]
fallthrough
case '\u2784': // ➄ [DINGBAT CIRCLED SANS-SERIF DIGIT FIVE]
fallthrough
case '\u278E': // ➎ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT FIVE]
fallthrough
case '\uFF15': // 5 [FULLWIDTH DIGIT FIVE]
output[outputPos] = '5'
outputPos++
case '\u248C': // ⒌ [DIGIT FIVE FULL STOP]
output = output[:(len(output) + 1)]
output[outputPos] = '5'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u2478': // ⑸ [PARENTHESIZED DIGIT FIVE]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = '5'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u2076': // ⁶ [SUPERSCRIPT SIX]
fallthrough
case '\u2086': // ₆ [SUBSCRIPT SIX]
fallthrough
case '\u2465': // ⑥ [CIRCLED DIGIT SIX]
fallthrough
case '\u24FA': // ⓺ [DOUBLE CIRCLED DIGIT SIX]
fallthrough
case '\u277B': // ❻ [DINGBAT NEGATIVE CIRCLED DIGIT SIX]
fallthrough
case '\u2785': // ➅ [DINGBAT CIRCLED SANS-SERIF DIGIT SIX]
fallthrough
case '\u278F': // ➏ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SIX]
fallthrough
case '\uFF16': // 6 [FULLWIDTH DIGIT SIX]
output[outputPos] = '6'
outputPos++
case '\u248D': // ⒍ [DIGIT SIX FULL STOP]
output = output[:(len(output) + 1)]
output[outputPos] = '6'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u2479': // ⑹ [PARENTHESIZED DIGIT SIX]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = '6'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u2077': // ⁷ [SUPERSCRIPT SEVEN]
fallthrough
case '\u2087': // ₇ [SUBSCRIPT SEVEN]
fallthrough
case '\u2466': // ⑦ [CIRCLED DIGIT SEVEN]
fallthrough
case '\u24FB': // ⓻ [DOUBLE CIRCLED DIGIT SEVEN]
fallthrough
case '\u277C': // ❼ [DINGBAT NEGATIVE CIRCLED DIGIT SEVEN]
fallthrough
case '\u2786': // ➆ [DINGBAT CIRCLED SANS-SERIF DIGIT SEVEN]
fallthrough
case '\u2790': // ➐ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT SEVEN]
fallthrough
case '\uFF17': // 7 [FULLWIDTH DIGIT SEVEN]
output[outputPos] = '7'
outputPos++
case '\u248E': // ⒎ [DIGIT SEVEN FULL STOP]
output = output[:(len(output) + 1)]
output[outputPos] = '7'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u247A': // ⑺ [PARENTHESIZED DIGIT SEVEN]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = '7'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u2078': // ⁸ [SUPERSCRIPT EIGHT]
fallthrough
case '\u2088': // ₈ [SUBSCRIPT EIGHT]
fallthrough
case '\u2467': // ⑧ [CIRCLED DIGIT EIGHT]
fallthrough
case '\u24FC': // ⓼ [DOUBLE CIRCLED DIGIT EIGHT]
fallthrough
case '\u277D': // ❽ [DINGBAT NEGATIVE CIRCLED DIGIT EIGHT]
fallthrough
case '\u2787': // ➇ [DINGBAT CIRCLED SANS-SERIF DIGIT EIGHT]
fallthrough
case '\u2791': // ➑ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT EIGHT]
fallthrough
case '\uFF18': // 8 [FULLWIDTH DIGIT EIGHT]
output[outputPos] = '8'
outputPos++
case '\u248F': // ⒏ [DIGIT EIGHT FULL STOP]
output = output[:(len(output) + 1)]
output[outputPos] = '8'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u247B': // ⑻ [PARENTHESIZED DIGIT EIGHT]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = '8'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u2079': // ⁹ [SUPERSCRIPT NINE]
fallthrough
case '\u2089': // ₉ [SUBSCRIPT NINE]
fallthrough
case '\u2468': // ⑨ [CIRCLED DIGIT NINE]
fallthrough
case '\u24FD': // ⓽ [DOUBLE CIRCLED DIGIT NINE]
fallthrough
case '\u277E': // ❾ [DINGBAT NEGATIVE CIRCLED DIGIT NINE]
fallthrough
case '\u2788': // ➈ [DINGBAT CIRCLED SANS-SERIF DIGIT NINE]
fallthrough
case '\u2792': // ➒ [DINGBAT NEGATIVE CIRCLED SANS-SERIF DIGIT NINE]
fallthrough
case '\uFF19': // 9 [FULLWIDTH DIGIT NINE]
output[outputPos] = '9'
outputPos++
case '\u2490': // ⒐ [DIGIT NINE FULL STOP]
output = output[:(len(output) + 1)]
output[outputPos] = '9'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u247C': // ⑼ [PARENTHESIZED DIGIT NINE]
output = output[:(len(output) + 2)]
output[outputPos] = '('
outputPos++
output[outputPos] = '9'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u2469': // ⑩ [CIRCLED NUMBER TEN]
fallthrough
case '\u24FE': // ⓾ [DOUBLE CIRCLED NUMBER TEN]
fallthrough
case '\u277F': // ❿ [DINGBAT NEGATIVE CIRCLED NUMBER TEN]
fallthrough
case '\u2789': // ➉ [DINGBAT CIRCLED SANS-SERIF NUMBER TEN]
fallthrough
case '\u2793': // ➓ [DINGBAT NEGATIVE CIRCLED SANS-SERIF NUMBER TEN]
output = output[:(len(output) + 1)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '0'
outputPos++
case '\u2491': // ⒑ [NUMBER TEN FULL STOP]
output = output[:(len(output) + 2)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '0'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u247D': // ⑽ [PARENTHESIZED NUMBER TEN]
output = output[:(len(output) + 3)]
output[outputPos] = '('
outputPos++
output[outputPos] = '1'
outputPos++
output[outputPos] = '0'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u246A': // ⑪ [CIRCLED NUMBER ELEVEN]
fallthrough
case '\u24EB': // ⓫ [NEGATIVE CIRCLED NUMBER ELEVEN]
output = output[:(len(output) + 1)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '1'
outputPos++
case '\u2492': // ⒒ [NUMBER ELEVEN FULL STOP]
output = output[:(len(output) + 2)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '1'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u247E': // ⑾ [PARENTHESIZED NUMBER ELEVEN]
output = output[:(len(output) + 3)]
output[outputPos] = '('
outputPos++
output[outputPos] = '1'
outputPos++
output[outputPos] = '1'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u246B': // ⑫ [CIRCLED NUMBER TWELVE]
fallthrough
case '\u24EC': // ⓬ [NEGATIVE CIRCLED NUMBER TWELVE]
output = output[:(len(output) + 1)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '2'
outputPos++
case '\u2493': // ⒓ [NUMBER TWELVE FULL STOP]
output = output[:(len(output) + 2)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '2'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u247F': // ⑿ [PARENTHESIZED NUMBER TWELVE]
output = output[:(len(output) + 3)]
output[outputPos] = '('
outputPos++
output[outputPos] = '1'
outputPos++
output[outputPos] = '2'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u246C': // ⑬ [CIRCLED NUMBER THIRTEEN]
fallthrough
case '\u24ED': // ⓭ [NEGATIVE CIRCLED NUMBER THIRTEEN]
output = output[:(len(output) + 1)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '3'
outputPos++
case '\u2494': // ⒔ [NUMBER THIRTEEN FULL STOP]
output = output[:(len(output) + 2)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '3'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u2480': // ⒀ [PARENTHESIZED NUMBER THIRTEEN]
output = output[:(len(output) + 3)]
output[outputPos] = '('
outputPos++
output[outputPos] = '1'
outputPos++
output[outputPos] = '3'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u246D': // ⑭ [CIRCLED NUMBER FOURTEEN]
fallthrough
case '\u24EE': // ⓮ [NEGATIVE CIRCLED NUMBER FOURTEEN]
output = output[:(len(output) + 1)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '4'
outputPos++
case '\u2495': // ⒕ [NUMBER FOURTEEN FULL STOP]
output = output[:(len(output) + 2)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '4'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u2481': // ⒁ [PARENTHESIZED NUMBER FOURTEEN]
output = output[:(len(output) + 3)]
output[outputPos] = '('
outputPos++
output[outputPos] = '1'
outputPos++
output[outputPos] = '4'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u246E': // ⑮ [CIRCLED NUMBER FIFTEEN]
fallthrough
case '\u24EF': // ⓯ [NEGATIVE CIRCLED NUMBER FIFTEEN]
output = output[:(len(output) + 1)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '5'
outputPos++
case '\u2496': // ⒖ [NUMBER FIFTEEN FULL STOP]
output = output[:(len(output) + 2)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '5'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u2482': // ⒂ [PARENTHESIZED NUMBER FIFTEEN]
output = output[:(len(output) + 3)]
output[outputPos] = '('
outputPos++
output[outputPos] = '1'
outputPos++
output[outputPos] = '5'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u246F': // ⑯ [CIRCLED NUMBER SIXTEEN]
fallthrough
case '\u24F0': // ⓰ [NEGATIVE CIRCLED NUMBER SIXTEEN]
output = output[:(len(output) + 1)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '6'
outputPos++
case '\u2497': // ⒗ [NUMBER SIXTEEN FULL STOP]
output = output[:(len(output) + 2)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '6'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u2483': // ⒃ [PARENTHESIZED NUMBER SIXTEEN]
output = output[:(len(output) + 3)]
output[outputPos] = '('
outputPos++
output[outputPos] = '1'
outputPos++
output[outputPos] = '6'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u2470': // ⑰ [CIRCLED NUMBER SEVENTEEN]
fallthrough
case '\u24F1': // ⓱ [NEGATIVE CIRCLED NUMBER SEVENTEEN]
output = output[:(len(output) + 1)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '7'
outputPos++
case '\u2498': // ⒘ [NUMBER SEVENTEEN FULL STOP]
output = output[:(len(output) + 2)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '7'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u2484': // ⒄ [PARENTHESIZED NUMBER SEVENTEEN]
output = output[:(len(output) + 3)]
output[outputPos] = '('
outputPos++
output[outputPos] = '1'
outputPos++
output[outputPos] = '7'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u2471': // ⑱ [CIRCLED NUMBER EIGHTEEN]
fallthrough
case '\u24F2': // ⓲ [NEGATIVE CIRCLED NUMBER EIGHTEEN]
output = output[:(len(output) + 1)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '8'
outputPos++
case '\u2499': // ⒙ [NUMBER EIGHTEEN FULL STOP]
output = output[:(len(output) + 2)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '8'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u2485': // ⒅ [PARENTHESIZED NUMBER EIGHTEEN]
output = output[:(len(output) + 3)]
output[outputPos] = '('
outputPos++
output[outputPos] = '1'
outputPos++
output[outputPos] = '8'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u2472': // ⑲ [CIRCLED NUMBER NINETEEN]
fallthrough
case '\u24F3': // ⓳ [NEGATIVE CIRCLED NUMBER NINETEEN]
output = output[:(len(output) + 1)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '9'
outputPos++
case '\u249A': // ⒚ [NUMBER NINETEEN FULL STOP]
output = output[:(len(output) + 2)]
output[outputPos] = '1'
outputPos++
output[outputPos] = '9'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u2486': // ⒆ [PARENTHESIZED NUMBER NINETEEN]
output = output[:(len(output) + 3)]
output[outputPos] = '('
outputPos++
output[outputPos] = '1'
outputPos++
output[outputPos] = '9'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u2473': // ⑳ [CIRCLED NUMBER TWENTY]
fallthrough
case '\u24F4': // ⓴ [NEGATIVE CIRCLED NUMBER TWENTY]
output = output[:(len(output) + 1)]
output[outputPos] = '2'
outputPos++
output[outputPos] = '0'
outputPos++
case '\u249B': // ⒛ [NUMBER TWENTY FULL STOP]
output = output[:(len(output) + 2)]
output[outputPos] = '2'
outputPos++
output[outputPos] = '0'
outputPos++
output[outputPos] = '.'
outputPos++
case '\u2487': // ⒇ [PARENTHESIZED NUMBER TWENTY]
output = output[:(len(output) + 3)]
output[outputPos] = '('
outputPos++
output[outputPos] = '2'
outputPos++
output[outputPos] = '0'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u00AB': // « [LEFT-POINTING DOUBLE ANGLE QUOTATION MARK]
fallthrough
case '\u00BB': // » [RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK]
fallthrough
case '\u201C': // “ [LEFT DOUBLE QUOTATION MARK]
fallthrough
case '\u201D': // ” [RIGHT DOUBLE QUOTATION MARK]
fallthrough
case '\u201E': // „ [DOUBLE LOW-9 QUOTATION MARK]
fallthrough
case '\u2033': // ″ [DOUBLE PRIME]
fallthrough
case '\u2036': // ‶ [REVERSED DOUBLE PRIME]
fallthrough
case '\u275D': // ❝ [HEAVY DOUBLE TURNED COMMA QUOTATION MARK ORNAMENT]
fallthrough
case '\u275E': // ❞ [HEAVY DOUBLE COMMA QUOTATION MARK ORNAMENT]
fallthrough
case '\u276E': // ❮ [HEAVY LEFT-POINTING ANGLE QUOTATION MARK ORNAMENT]
fallthrough
case '\u276F': // ❯ [HEAVY RIGHT-POINTING ANGLE QUOTATION MARK ORNAMENT]
fallthrough
case '\uFF02': // " [FULLWIDTH QUOTATION MARK]
output[outputPos] = '"'
outputPos++
case '\u2018': // ‘ [LEFT SINGLE QUOTATION MARK]
fallthrough
case '\u2019': // ’ [RIGHT SINGLE QUOTATION MARK]
fallthrough
case '\u201A': // ‚ [SINGLE LOW-9 QUOTATION MARK]
fallthrough
case '\u201B': // ‛ [SINGLE HIGH-REVERSED-9 QUOTATION MARK]
fallthrough
case '\u2032': // ′ [PRIME]
fallthrough
case '\u2035': // ‵ [REVERSED PRIME]
fallthrough
case '\u2039': // ‹ [SINGLE LEFT-POINTING ANGLE QUOTATION MARK]
fallthrough
case '\u203A': // › [SINGLE RIGHT-POINTING ANGLE QUOTATION MARK]
fallthrough
case '\u275B': // ❛ [HEAVY SINGLE TURNED COMMA QUOTATION MARK ORNAMENT]
fallthrough
case '\u275C': // ❜ [HEAVY SINGLE COMMA QUOTATION MARK ORNAMENT]
fallthrough
case '\uFF07': // ' [FULLWIDTH APOSTROPHE]
output[outputPos] = '\''
outputPos++
case '\u2010': // ‐ [HYPHEN]
fallthrough
case '\u2011': // ‑ [NON-BREAKING HYPHEN]
fallthrough
case '\u2012': // ‒ [FIGURE DASH]
fallthrough
case '\u2013': // – [EN DASH]
fallthrough
case '\u2014': // — [EM DASH]
fallthrough
case '\u207B': // ⁻ [SUPERSCRIPT MINUS]
fallthrough
case '\u208B': // ₋ [SUBSCRIPT MINUS]
fallthrough
case '\uFF0D': // - [FULLWIDTH HYPHEN-MINUS]
output[outputPos] = '-'
outputPos++
case '\u2045': // ⁅ [LEFT SQUARE BRACKET WITH QUILL]
fallthrough
case '\u2772': // ❲ [LIGHT LEFT TORTOISE SHELL BRACKET ORNAMENT]
fallthrough
case '\uFF3B': // [ [FULLWIDTH LEFT SQUARE BRACKET]
output[outputPos] = '['
outputPos++
case '\u2046': // ⁆ [RIGHT SQUARE BRACKET WITH QUILL]
fallthrough
case '\u2773': // ❳ [LIGHT RIGHT TORTOISE SHELL BRACKET ORNAMENT]
fallthrough
case '\uFF3D': // ] [FULLWIDTH RIGHT SQUARE BRACKET]
output[outputPos] = ']'
outputPos++
case '\u207D': // ⁽ [SUPERSCRIPT LEFT PARENTHESIS]
fallthrough
case '\u208D': // ₍ [SUBSCRIPT LEFT PARENTHESIS]
fallthrough
case '\u2768': // ❨ [MEDIUM LEFT PARENTHESIS ORNAMENT]
fallthrough
case '\u276A': // ❪ [MEDIUM FLATTENED LEFT PARENTHESIS ORNAMENT]
fallthrough
case '\uFF08': // ( [FULLWIDTH LEFT PARENTHESIS]
output[outputPos] = '('
outputPos++
case '\u2E28': // ⸨ [LEFT DOUBLE PARENTHESIS]
output = output[:(len(output) + 1)]
output[outputPos] = '('
outputPos++
output[outputPos] = '('
outputPos++
case '\u207E': // ⁾ [SUPERSCRIPT RIGHT PARENTHESIS]
fallthrough
case '\u208E': // ₎ [SUBSCRIPT RIGHT PARENTHESIS]
fallthrough
case '\u2769': // ❩ [MEDIUM RIGHT PARENTHESIS ORNAMENT]
fallthrough
case '\u276B': // ❫ [MEDIUM FLATTENED RIGHT PARENTHESIS ORNAMENT]
fallthrough
case '\uFF09': // ) [FULLWIDTH RIGHT PARENTHESIS]
output[outputPos] = ')'
outputPos++
case '\u2E29': // ⸩ [RIGHT DOUBLE PARENTHESIS]
output = output[:(len(output) + 1)]
output[outputPos] = ')'
outputPos++
output[outputPos] = ')'
outputPos++
case '\u276C': // ❬ [MEDIUM LEFT-POINTING ANGLE BRACKET ORNAMENT]
fallthrough
case '\u2770': // ❰ [HEAVY LEFT-POINTING ANGLE BRACKET ORNAMENT]
fallthrough
case '\uFF1C': // < [FULLWIDTH LESS-THAN SIGN]
output[outputPos] = '<'
outputPos++
case '\u276D': // ❭ [MEDIUM RIGHT-POINTING ANGLE BRACKET ORNAMENT]
fallthrough
case '\u2771': // ❱ [HEAVY RIGHT-POINTING ANGLE BRACKET ORNAMENT]
fallthrough
case '\uFF1E': // > [FULLWIDTH GREATER-THAN SIGN]
output[outputPos] = '>'
outputPos++
case '\u2774': // ❴ [MEDIUM LEFT CURLY BRACKET ORNAMENT]
fallthrough
case '\uFF5B': // { [FULLWIDTH LEFT CURLY BRACKET]
output[outputPos] = '{'
outputPos++
case '\u2775': // ❵ [MEDIUM RIGHT CURLY BRACKET ORNAMENT]
fallthrough
case '\uFF5D': // } [FULLWIDTH RIGHT CURLY BRACKET]
output[outputPos] = '}'
outputPos++
case '\u207A': // ⁺ [SUPERSCRIPT PLUS SIGN]
fallthrough
case '\u208A': // ₊ [SUBSCRIPT PLUS SIGN]
fallthrough
case '\uFF0B': // + [FULLWIDTH PLUS SIGN]
output[outputPos] = '+'
outputPos++
case '\u207C': // ⁼ [SUPERSCRIPT EQUALS SIGN]
fallthrough
case '\u208C': // ₌ [SUBSCRIPT EQUALS SIGN]
fallthrough
case '\uFF1D': // = [FULLWIDTH EQUALS SIGN]
output[outputPos] = '='
outputPos++
case '\uFF01': // ! [FULLWIDTH EXCLAMATION MARK]
output[outputPos] = '!'
outputPos++
case '\u203C': // ‼ [DOUBLE EXCLAMATION MARK]
output = output[:(len(output) + 1)]
output[outputPos] = '!'
outputPos++
output[outputPos] = '!'
outputPos++
case '\u2049': // ⁉ [EXCLAMATION QUESTION MARK]
output = output[:(len(output) + 1)]
output[outputPos] = '!'
outputPos++
output[outputPos] = '?'
outputPos++
case '\uFF03': // # [FULLWIDTH NUMBER SIGN]
output[outputPos] = '#'
outputPos++
case '\uFF04': // $ [FULLWIDTH DOLLAR SIGN]
output[outputPos] = '$'
outputPos++
case '\u2052': // ⁒ [COMMERCIAL MINUS SIGN]
fallthrough
case '\uFF05': // % [FULLWIDTH PERCENT SIGN]
output[outputPos] = '%'
outputPos++
case '\uFF06': // & [FULLWIDTH AMPERSAND]
output[outputPos] = '&'
outputPos++
case '\u204E': // ⁎ [LOW ASTERISK]
fallthrough
case '\uFF0A': // * [FULLWIDTH ASTERISK]
output[outputPos] = '*'
outputPos++
case '\uFF0C': // , [FULLWIDTH COMMA]
output[outputPos] = ','
outputPos++
case '\uFF0E': // . [FULLWIDTH FULL STOP]
output[outputPos] = '.'
outputPos++
case '\u2044': // ⁄ [FRACTION SLASH]
fallthrough
case '\uFF0F': // / [FULLWIDTH SOLIDUS]
output[outputPos] = '/'
outputPos++
case '\uFF1A': // : [FULLWIDTH COLON]
output[outputPos] = ':'
outputPos++
case '\u204F': // ⁏ [REVERSED SEMICOLON]
fallthrough
case '\uFF1B': // ; [FULLWIDTH SEMICOLON]
output[outputPos] = ';'
outputPos++
case '\uFF1F': // ? [FULLWIDTH QUESTION MARK]
output[outputPos] = '?'
outputPos++
case '\u2047': // ⁇ [DOUBLE QUESTION MARK]
output = output[:(len(output) + 1)]
output[outputPos] = '?'
outputPos++
output[outputPos] = '?'
outputPos++
case '\u2048': // ⁈ [QUESTION EXCLAMATION MARK]
output = output[:(len(output) + 1)]
output[outputPos] = '?'
outputPos++
output[outputPos] = '!'
outputPos++
case '\uFF20': // @ [FULLWIDTH COMMERCIAL AT]
output[outputPos] = '@'
outputPos++
case '\uFF3C': // \ [FULLWIDTH REVERSE SOLIDUS]
output[outputPos] = '\\'
outputPos++
case '\u2038': // ‸ [CARET]
fallthrough
case '\uFF3E': // ^ [FULLWIDTH CIRCUMFLEX ACCENT]
output[outputPos] = '^'
outputPos++
case '\uFF3F': // _ [FULLWIDTH LOW LINE]
output[outputPos] = '_'
outputPos++
case '\u2053': // ⁓ [SWUNG DASH]
fallthrough
case '\uFF5E': // ~ [FULLWIDTH TILDE]
output[outputPos] = '~'
outputPos++
default:
output[outputPos] = c
outputPos++
}
}
}
return output
}
================================================
FILE: analysis/char/asciifolding/asciifolding_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package asciifolding
import (
"fmt"
"reflect"
"testing"
)
func TestAsciiFoldingFilter(t *testing.T) {
tests := []struct {
input []byte
output []byte
}{
{
// empty input passes
input: []byte(``),
output: []byte(``),
},
{
// no modification for plain ASCII
input: []byte(`The quick brown fox jumps over the lazy dog`),
output: []byte(`The quick brown fox jumps over the lazy dog`),
},
{
// Umlauts are folded to plain ASCII
input: []byte(`The quick bröwn fox jümps over the läzy dog`),
output: []byte(`The quick brown fox jumps over the lazy dog`),
},
{
// composite unicode runes are folded to more than one ASCII rune
input: []byte(`ÆꜴ`),
output: []byte(`AEAO`),
},
{
// apples from https://issues.couchbase.com/browse/MB-33486
input: []byte(`Ápple Àpple Äpple Âpple Ãpple Åpple`),
output: []byte(`Apple Apple Apple Apple Apple Apple`),
},
{
// Fix ASCII folding of \u24A2
input: []byte(`⒢`),
output: []byte(`(g)`),
},
{
// Test folding of \u2053 (SWUNG DASH)
input: []byte(`a⁓b`),
output: []byte(`a~b`),
},
{
// Test folding of \uFF5E (FULLWIDTH TILDE)
input: []byte(`c~d`),
output: []byte(`c~d`),
},
{
// Test folding of \uFF3F (FULLWIDTH LOW LINE) - case before tilde
input: []byte(`e_f`),
output: []byte(`e_f`),
},
{
// Test mix including tilde and default fallthrough (using a character not explicitly folded)
input: []byte(`a⁓b✅c~d`),
output: []byte(`a~b✅c~d`),
},
{
// Test start of 'A' fallthrough block
input: []byte(`ÀBC`),
output: []byte(`ABC`),
},
{
// Test end of 'A' fallthrough block
input: []byte(`DEFẶ`),
output: []byte(`DEFA`),
},
{
// Test start of 'AE' fallthrough block
input: []byte(`Æ`),
output: []byte(`AE`),
},
{
// Test end of 'AE' fallthrough block
input: []byte(`ᴁ`),
output: []byte(`AE`),
},
{
// Test 'DZ' multi-rune output
input: []byte(`DŽebra`),
output: []byte(`DZebra`),
},
{
// Test start of 'a' fallthrough block
input: []byte(`àbc`),
output: []byte(`abc`),
},
{
// Test end of 'a' fallthrough block
input: []byte(`defa`),
output: []byte(`defa`),
},
}
for _, test := range tests {
filter := New()
t.Run(fmt.Sprintf("on %s", test.input), func(t *testing.T) {
output := filter.Filter(test.input)
if !reflect.DeepEqual(output, test.output) {
t.Errorf("\nExpected:\n`%s`\ngot:\n`%s`\n", string(test.output), string(output))
}
})
}
}
================================================
FILE: analysis/char/html/html.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package html
import (
"bytes"
"regexp"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "html"
var htmlCharFilterRegexp = regexp.MustCompile(`?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`)
type CharFilter struct {
r *regexp.Regexp
replacement []byte
}
func New() *CharFilter {
return &CharFilter{
r: htmlCharFilterRegexp,
replacement: []byte(" "),
}
}
func (s *CharFilter) Filter(input []byte) []byte {
return s.r.ReplaceAllFunc(
input, func(in []byte) []byte {
return bytes.Repeat(s.replacement, len(in))
})
}
func CharFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) {
return New(), nil
}
func init() {
err := registry.RegisterCharFilter(Name, CharFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/char/regexp/regexp.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package regexp
import (
"fmt"
"regexp"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "regexp"
type CharFilter struct {
r *regexp.Regexp
replacement []byte
}
func New(r *regexp.Regexp, replacement []byte) *CharFilter {
return &CharFilter{
r: r,
replacement: replacement,
}
}
func (s *CharFilter) Filter(input []byte) []byte {
return s.r.ReplaceAll(input, s.replacement)
}
func CharFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) {
regexpStr, ok := config["regexp"].(string)
if !ok {
return nil, fmt.Errorf("must specify regexp")
}
r, err := regexp.Compile(regexpStr)
if err != nil {
return nil, fmt.Errorf("unable to build regexp char filter: %v", err)
}
replaceBytes := []byte(" ")
replaceStr, ok := config["replace"].(string)
if ok {
replaceBytes = []byte(replaceStr)
}
return New(r, replaceBytes), nil
}
func init() {
err := registry.RegisterCharFilter(Name, CharFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/char/regexp/regexp_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package regexp
import (
"fmt"
"reflect"
"regexp"
"testing"
)
func TestRegexpCharFilter(t *testing.T) {
tests := []struct {
regexStr string
replace []byte
input []byte
output []byte
}{
{
regexStr: `?[!\w]+((\s+\w+(\s*=\s*(?:".*?"|'.*?'|[^'">\s]+))?)+\s*|\s*)/?>`,
replace: []byte{' '},
input: []byte(`test`),
output: []byte(` test `),
},
{
regexStr: `\x{200C}`,
replace: []byte{' '},
input: []byte("water\u200Cunder\u200Cthe\u200Cbridge"),
output: []byte("water under the bridge"),
},
{
regexStr: `([a-z])\s+(\d)`,
replace: []byte(`$1-$2`),
input: []byte(`temp 1`),
output: []byte(`temp-1`),
},
{
regexStr: `foo.?`,
replace: []byte(`X`),
input: []byte(`seafood, fool`),
output: []byte(`seaX, X`),
},
{
regexStr: `def`,
replace: []byte(`_`),
input: []byte(`abcdefghi`),
output: []byte(`abc_ghi`),
},
{
regexStr: `456`,
replace: []byte(`000000`),
input: []byte(`123456789`),
output: []byte(`123000000789`),
},
{
regexStr: `“|”`,
replace: []byte(`"`),
input: []byte(`“hello”`),
output: []byte(`"hello"`),
},
}
for _, test := range tests {
t.Run(fmt.Sprintf("match %s replace %s", test.regexStr, string(test.replace)), func(t *testing.T) {
regex := regexp.MustCompile(test.regexStr)
filter := New(regex, test.replace)
output := filter.Filter(test.input)
if !reflect.DeepEqual(test.output, output) {
t.Errorf("Expected: `%s`, Got: `%s`\n", string(test.output), string(output))
}
})
}
}
================================================
FILE: analysis/char/zerowidthnonjoiner/zerowidthnonjoiner.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package zerowidthnonjoiner
import (
"regexp"
"github.com/blevesearch/bleve/v2/analysis"
regexpCharFilter "github.com/blevesearch/bleve/v2/analysis/char/regexp"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "zero_width_spaces"
var zeroWidthNonJoinerRegexp = regexp.MustCompile(`\x{200C}`)
func CharFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.CharFilter, error) {
replaceBytes := []byte(" ")
return regexpCharFilter.New(zeroWidthNonJoinerRegexp, replaceBytes), nil
}
func init() {
err := registry.RegisterCharFilter(Name, CharFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/datetime/flexible/flexible.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package flexible
import (
"fmt"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "flexiblego"
type DateTimeParser struct {
layouts []string
}
func New(layouts []string) *DateTimeParser {
return &DateTimeParser{
layouts: layouts,
}
}
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
for _, layout := range p.layouts {
rv, err := time.Parse(layout, input)
if err == nil {
return rv, layout, nil
}
}
return time.Time{}, "", analysis.ErrInvalidDateTime
}
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
layouts, ok := config["layouts"].([]interface{})
if !ok {
return nil, fmt.Errorf("must specify layouts")
}
var layoutStrs []string
for _, layout := range layouts {
layoutStr, ok := layout.(string)
if ok {
layoutStrs = append(layoutStrs, layoutStr)
}
}
return New(layoutStrs), nil
}
func init() {
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/datetime/flexible/flexible_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package flexible
import (
"reflect"
"testing"
"time"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestFlexibleDateTimeParser(t *testing.T) {
testLocation := time.FixedZone("", -8*60*60)
rfc3339NoTimezone := "2006-01-02T15:04:05"
rfc3339NoTimezoneNoT := "2006-01-02 15:04:05"
rfc3339NoTime := "2006-01-02"
dateOptionalTimeParser := New(
[]string{
time.RFC3339Nano,
time.RFC3339,
rfc3339NoTimezone,
rfc3339NoTimezoneNoT,
rfc3339NoTime,
})
tests := []struct {
input string
expectedTime time.Time
expectedLayout string
expectedError error
}{
{
input: "2014-08-03",
expectedTime: time.Date(2014, 8, 3, 0, 0, 0, 0, time.UTC),
expectedLayout: rfc3339NoTime,
expectedError: nil,
},
{
input: "2014-08-03T15:59:30",
expectedTime: time.Date(2014, 8, 3, 15, 59, 30, 0, time.UTC),
expectedLayout: rfc3339NoTimezone,
expectedError: nil,
},
{
input: "2014-08-03 15:59:30",
expectedTime: time.Date(2014, 8, 3, 15, 59, 30, 0, time.UTC),
expectedLayout: rfc3339NoTimezoneNoT,
expectedError: nil,
},
{
input: "2014-08-03T15:59:30-08:00",
expectedTime: time.Date(2014, 8, 3, 15, 59, 30, 0, testLocation),
expectedLayout: time.RFC3339Nano,
expectedError: nil,
},
{
input: "2014-08-03T15:59:30.999999999-08:00",
expectedTime: time.Date(2014, 8, 3, 15, 59, 30, 999999999, testLocation),
expectedLayout: time.RFC3339Nano,
expectedError: nil,
},
{
input: "not a date time",
expectedTime: time.Time{},
expectedLayout: "",
expectedError: analysis.ErrInvalidDateTime,
},
}
for _, test := range tests {
t.Run(test.input, func(t *testing.T) {
actualTime, actualLayout, actualErr := dateOptionalTimeParser.ParseDateTime(test.input)
if actualErr != test.expectedError {
t.Fatalf("expected error %#v, got %#v", test.expectedError, actualErr)
}
if !reflect.DeepEqual(actualTime, test.expectedTime) {
t.Errorf("expected time %v, got %v", test.expectedTime, actualTime)
}
if !reflect.DeepEqual(actualLayout, test.expectedLayout) {
t.Errorf("expected layout %v, got %v", test.expectedLayout, actualLayout)
}
})
}
}
================================================
FILE: analysis/datetime/iso/iso.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iso
import (
"fmt"
"strings"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "isostyle"
var textLiteralDelimiter byte = '\'' // single quote
// ISO style date strings are represented in
// https://docs.oracle.com/javase/8/docs/api/java/time/format/DateTimeFormatter.html
//
// Some format specifiers are not specified in go time package, such as:
// - 'V' for timezone name, like 'Europe/Berlin' or 'America/New_York'.
// - 'Q' for quarter of year, like Q3 or 3rd Quarter.
// - 'zzzz' for full name of timezone like "Japan Standard Time" or "Eastern Standard Time".
// - 'O' for localized zone-offset, like GMT+8 or GMT+08:00.
// - '[]' for optional section of the format.
// - 'G' for era, like AD or BC.
// - 'W' for week of month.
// - 'D' for day of year.
// So date strings with these date elements cannot be parsed.
var timeElementToLayout = map[byte]map[int]string{
'M': {
4: "January", // MMMM = full month name
3: "Jan", // MMM = short month name
2: "01", // MM = month of year (2 digits) (01-12)
1: "1", // M = month of year (1 digit) (1-12)
},
'd': {
2: "02", // dd = day of month (2 digits) (01-31)
1: "2", // d = day of month (1 digit) (1-31)
},
'a': {
2: "pm", // aa = pm/am
1: "PM", // a = PM/AM
},
'H': {
2: "15", // HH = hour (24 hour clock) (2 digits)
1: "15", // H = hour (24 hour clock) (1 digit)
},
'm': {
2: "04", // mm = minute (2 digits)
1: "4", // m = minute (1 digit)
},
's': {
2: "05", // ss = seconds (2 digits)
1: "5", // s = seconds (1 digit)
},
// timezone offsets from UTC below
'X': {
5: "Z07:00:00", // XXXXX = timezone offset (+-hh:mm:ss)
4: "Z070000", // XXXX = timezone offset (+-hhmmss)
3: "Z07:00", // XXX = timezone offset (+-hh:mm)
2: "Z0700", // XX = timezone offset (+-hhmm)
1: "Z07", // X = timezone offset (+-hh)
},
'x': {
5: "-07:00:00", // xxxxx = timezone offset (+-hh:mm:ss)
4: "-070000", // xxxx = timezone offset (+-hhmmss)
3: "-07:00", // xxx = timezone offset (+-hh:mm)
2: "-0700", // xx = timezone offset (+-hhmm)
1: "-07", // x = timezone offset (+-hh)
},
}
type DateTimeParser struct {
layouts []string
}
func New(layouts []string) *DateTimeParser {
return &DateTimeParser{
layouts: layouts,
}
}
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
for _, layout := range p.layouts {
rv, err := time.Parse(layout, input)
if err == nil {
return rv, layout, nil
}
}
return time.Time{}, "", analysis.ErrInvalidDateTime
}
func letterCounter(layout string, idx int) int {
count := 1
for idx+count < len(layout) {
if layout[idx+count] == layout[idx] {
count++
} else {
break
}
}
return count
}
func invalidFormatError(character byte, count int) error {
return fmt.Errorf("invalid format string, unknown format specifier: %s", strings.Repeat(string(character), count))
}
func parseISOString(layout string) (string, error) {
var dateTimeLayout strings.Builder
for idx := 0; idx < len(layout); {
// check if the character is a text literal delimiter (')
if layout[idx] == textLiteralDelimiter {
if idx+1 < len(layout) && layout[idx+1] == textLiteralDelimiter {
// if the next character is also a text literal delimiter, then
// copy the character as is
dateTimeLayout.WriteByte(textLiteralDelimiter)
idx += 2
continue
}
// find the next text literal delimiter
for idx++; idx < len(layout); idx++ {
if layout[idx] == textLiteralDelimiter {
break
}
dateTimeLayout.WriteByte(layout[idx])
}
// idx can either be equal to len(layout) if the text literal delimiter is not found
// after the first text literal delimiter or it will be equal to the index of the
// second text literal delimiter
if idx == len(layout) {
// text literal delimiter not found error
return "", fmt.Errorf("invalid format string, expected text literal delimiter: %s", string(textLiteralDelimiter))
}
// increment idx to skip the second text literal delimiter
idx++
continue
}
// check if character is a letter in english alphabet - a-zA-Z which are reserved
// for format specifiers
if (layout[idx] >= 'a' && layout[idx] <= 'z') || (layout[idx] >= 'A' && layout[idx] <= 'Z') {
// find the number of times the character occurs consecutively
count := letterCounter(layout, idx)
character := layout[idx]
// first check the table
if layout, ok := timeElementToLayout[character][count]; ok {
dateTimeLayout.WriteString(layout)
} else {
switch character {
case 'y', 'u', 'Y':
// year
if count == 2 {
dateTimeLayout.WriteString("06")
} else {
format := fmt.Sprintf("%%0%ds", count)
dateTimeLayout.WriteString(fmt.Sprintf(format, "2006"))
}
case 'h', 'K':
// hour (1-12)
switch count {
case 2:
// hh, KK -> 03
dateTimeLayout.WriteString("03")
case 1:
// h, K -> 3
dateTimeLayout.WriteString("3")
default:
// e.g., hhh
return "", invalidFormatError(character, count)
}
case 'E':
// day of week
if count == 4 {
dateTimeLayout.WriteString("Monday") // EEEE -> Monday
} else if count <= 3 {
dateTimeLayout.WriteString("Mon") // E, EE, EEE -> Mon
} else {
return "", invalidFormatError(character, count) // e.g., EEEEE
}
case 'S':
// fraction of second
// .SSS = millisecond
// .SSSSSS = microsecond
// .SSSSSSSSS = nanosecond
if count > 9 {
return "", invalidFormatError(character, count)
}
dateTimeLayout.WriteString(strings.Repeat(string('0'), count))
case 'z':
// timezone id
if count < 5 {
dateTimeLayout.WriteString("MST")
} else {
return "", invalidFormatError(character, count)
}
default:
return "", invalidFormatError(character, count)
}
}
idx += count
} else {
// copy the character as is
dateTimeLayout.WriteByte(layout[idx])
idx++
}
}
return dateTimeLayout.String(), nil
}
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
layouts, ok := config["layouts"].([]interface{})
if !ok {
return nil, fmt.Errorf("must specify layouts")
}
var layoutStrs []string
for _, layout := range layouts {
layoutStr, ok := layout.(string)
if ok {
layout, err := parseISOString(layoutStr)
if err != nil {
return nil, err
}
layoutStrs = append(layoutStrs, layout)
}
}
return New(layoutStrs), nil
}
func init() {
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/datetime/iso/iso_test.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package iso
import (
"fmt"
"testing"
)
func TestConversionFromISOStyle(t *testing.T) {
tests := []struct {
input string
output string
err error
}{
{
input: "yyyy-MM-dd",
output: "2006-01-02",
err: nil,
},
{
input: "uuu/M''''dd'T'HH:m:ss.SSS",
output: "2006/1''02T15:4:05.000",
err: nil,
},
{
input: "YYYY-MM-dd'T'H:mm:ss zzz",
output: "2006-01-02T15:04:05 MST",
err: nil,
},
{
input: "MMMM dd yyyy', 'HH:mm:ss.SSS",
output: "January 02 2006, 15:04:05.000",
err: nil,
},
{
input: "h 'o'''' clock' a, XXX",
output: "3 o' clock PM, Z07:00",
err: nil,
},
{
input: "YYYY-MM-dd'T'HH:mm:ss'Z'",
output: "2006-01-02T15:04:05Z",
err: nil,
},
{
input: "E MMM d H:mm:ss z Y",
output: "Mon Jan 2 15:04:05 MST 2006",
err: nil,
},
{
input: "E MMM DD H:m:s z Y",
output: "",
err: fmt.Errorf("invalid format string, unknown format specifier: DD"),
},
{
input: "E MMM''''' H:m:s z Y",
output: "",
err: fmt.Errorf("invalid format string, expected text literal delimiter: '"),
},
{
input: "MMMMM dd yyyy', 'HH:mm:ss.SSS",
output: "",
err: fmt.Errorf("invalid format string, unknown format specifier: MMMMM"),
},
{
input: "yy", // year (2 digits)
output: "06",
err: nil,
},
{
input: "yyyyy", // year (5 digits, padded)
output: "02006",
err: nil,
},
{
input: "h", // hour 1-12 (1 digit)
output: "3",
err: nil,
},
{
input: "hh", // hour 1-12 (2 digits)
output: "03",
err: nil,
},
{
input: "KK", // hour 1-12 (2 digits, alt)
output: "03",
err: nil,
},
{
input: "hhh", // invalid hour count
output: "",
err: fmt.Errorf("invalid format string, unknown format specifier: hhh"),
},
{
input: "E", // Day of week (short)
output: "Mon",
err: nil,
},
{
input: "EEE", // Day of week (short)
output: "Mon",
err: nil,
},
{
input: "EEEE", // Day of week (long)
output: "Monday",
err: nil,
},
{
input: "EEEEE", // Day of week (long)
output: "",
err: fmt.Errorf("invalid format string, unknown format specifier: EEEEE"),
},
{
input: "S", // Fraction of second (1 digit)
output: "0",
err: nil,
},
{
input: "SSSSSSSSS", // Fraction of second (9 digits)
output: "000000000",
err: nil,
},
{
input: "SSSSSSSSSS", // Invalid fraction of second count
output: "",
err: fmt.Errorf("invalid format string, unknown format specifier: SSSSSSSSSS"),
},
{
input: "z", // Timezone name (short)
output: "MST",
err: nil,
},
{
input: "zzz", // Timezone name (short) - Corrected expectation
output: "MST", // Should output MST
err: nil, // Should not produce an error
},
{
input: "zzzz", // Timezone name (long) - Corrected expectation
output: "MST", // Should output MST
err: nil, // Should not produce an error
},
{
input: "G", // Era designator (unsupported)
output: "",
err: fmt.Errorf("invalid format string, unknown format specifier: G"),
},
{
input: "W", // Week of month (unsupported)
output: "",
err: fmt.Errorf("invalid format string, unknown format specifier: W"),
},
}
for i, test := range tests {
t.Run(fmt.Sprintf("test %d: %s", i, test.input), func(t *testing.T) {
out, err := parseISOString(test.input)
// Check error matching
if (err != nil && test.err == nil) || (err == nil && test.err != nil) || (err != nil && test.err != nil && err.Error() != test.err.Error()) {
t.Fatalf("expected error %v, got error %v", test.err, err)
}
// Check output matching only if no error was expected/occurred
if err == nil && test.err == nil && out != test.output {
t.Fatalf("expected output '%v', got '%v'", test.output, out)
}
})
}
}
================================================
FILE: analysis/datetime/optional/optional.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package optional
import (
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/datetime/flexible"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "dateTimeOptional"
const rfc3339NoTimezone = "2006-01-02T15:04:05"
const rfc3339NoTimezoneNoT = "2006-01-02 15:04:05"
const rfc3339Offset = "2006-01-02 15:04:05 -0700"
const rfc3339NoTime = "2006-01-02"
var layouts = []string{
time.RFC3339Nano,
time.RFC3339,
rfc3339NoTimezone,
rfc3339NoTimezoneNoT,
rfc3339Offset,
rfc3339NoTime,
}
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
return flexible.New(layouts), nil
}
func init() {
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/datetime/percent/percent.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package percent
import (
"fmt"
"strings"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "percentstyle"
var formatDelimiter byte = '%'
// format specifiers as per strftime in the C standard library
// https://man7.org/linux/man-pages/man3/strftime.3.html
var formatSpecifierToLayout = map[byte]string{
formatDelimiter: string(formatDelimiter), // %% = % (literal %)
'a': "Mon", // %a = short weekday name
'A': "Monday", // %A = full weekday name
'd': "02", // %d = day of month (2 digits) (01-31)
'e': "2", // %e = day of month (1 digit) (1-31)
'b': "Jan", // %b = short month name
'B': "January", // %B = full month name
'm': "01", // %m = month of year (2 digits) (01-12)
'y': "06", // %y = year without century
'Y': "2006", // %Y = year with century
'H': "15", // %H = hour (24 hour clock) (2 digits)
'I': "03", // %I = hour (12 hour clock) (2 digits)
'l': "3", // %l = hour (12 hour clock) (1 digit)
'p': "PM", // %p = PM/AM
'P': "pm", // %P = pm/am (lowercase)
'M': "04", // %M = minute (2 digits)
'S': "05", // %S = seconds (2 digits)
'f': "999999", // .%f = fraction of seconds - up to microseconds (6 digits) - deci/milli/micro
'Z': "MST", // %Z = timezone name (GMT, JST, UTC etc)
// %z is present in timezone options
// some additional options not in strftime to support additional options such as
// disallow 0 padding in minute and seconds, nanosecond precision, etc
'o': "1", // %o = month of year (1 digit) (1-12)
'i': "4", // %i = minute (1 digit)
's': "5", // %s = seconds (1 digit)
'N': "999999999", // .%N = fraction of seconds - up to microseconds (9 digits) - milli/micro/nano
}
// some additional options for timezone
// such as allowing colon in timezone offset and specifying the seconds
// timezone offsets are from UTC
var timezoneOptions = map[string]string{
"z": "Z0700", // %z = timezone offset in +-hhmm / +-(2 digit hour)(2 digit minute) +0500, -0600 etc
"z:M": "Z07:00", // %z:M = timezone offset(+-hh:mm) / +-(2 digit hour):(2 digit minute) +05:00, -06:00 etc
"z:S": "Z07:00:00", // %z:M = timezone offset(+-hh:mm:ss) / +-(2 digit hour):(2 digit minute):(2 digit second) +05:20:00, -06:30:00 etc
"zH": "Z07", // %zH = timezone offset(+-hh) / +-(2 digit hour) +05, -06 etc
"zS": "Z070000", // %zS = timezone offset(+-hhmmss) / +-(2 digit hour)(2 digit minute)(2 digit second) +052000, -063000 etc
}
type DateTimeParser struct {
layouts []string
}
func New(layouts []string) *DateTimeParser {
return &DateTimeParser{
layouts: layouts,
}
}
func checkTZOptions(formatString string, idx int) (string, int) {
// idx points to '%'
// We know formatString[idx+1] == 'z'
nextIdx := idx + 2 // Index of the character immediately after 'z'
// Default values assume only '%z' is present
layout := timezoneOptions["z"]
finalIdx := nextIdx // Index after '%z'
if nextIdx < len(formatString) {
switch formatString[nextIdx] {
case ':':
// Check for modifier after the colon ':'
colonModifierIdx := nextIdx + 1
if colonModifierIdx < len(formatString) {
switch formatString[colonModifierIdx] {
case 'M':
// Found %z:M
layout = timezoneOptions["z:M"]
finalIdx = colonModifierIdx + 1 // Index after %z:M
case 'S':
// Found %z:S
layout = timezoneOptions["z:S"]
finalIdx = colonModifierIdx + 1 // Index after %z:S
// default: If %z: is followed by something else, or just %z: at the end.
// Keep the default layout ("z") and finalIdx (idx + 2).
// The ':' will be treated as a literal by the main loop.
}
}
// else: %z: is at the very end of the string.
// Keep the default layout ("z") and finalIdx (idx + 2).
// The ':' will be treated as a literal by the main loop.
case 'H':
// Found %zH
layout = timezoneOptions["zH"]
finalIdx = nextIdx + 1 // Index after %zH
case 'S':
// Found %zS
layout = timezoneOptions["zS"]
finalIdx = nextIdx + 1 // Index after %zS
// default: If %z is followed by something other than ':', 'H', or 'S'.
// Keep the default layout ("z") and finalIdx (idx + 2).
// The character formatString[nextIdx] will be handled by the main loop.
}
}
// else: %z is at the very end of the string.
// Keep the default layout ("z") and finalIdx (idx + 2).
return layout, finalIdx
}
func parseFormatString(formatString string) (string, error) {
var dateTimeLayout strings.Builder
// iterate over the format string and replace the format specifiers with
// the corresponding golang constants
for idx := 0; idx < len(formatString); {
// check if the character is a format delimiter (%)
if formatString[idx] == formatDelimiter {
// check if there is a character after the format delimiter (%)
if idx+1 >= len(formatString) {
return "", fmt.Errorf("invalid format string, expected character after %s", string(formatDelimiter))
}
formatSpecifier := formatString[idx+1]
if layout, ok := formatSpecifierToLayout[formatSpecifier]; ok {
dateTimeLayout.WriteString(layout)
idx += 2
} else if formatSpecifier == 'z' {
// did not find a valid specifier
// check if it is for timezone
var tzLayout string
tzLayout, idx = checkTZOptions(formatString, idx)
dateTimeLayout.WriteString(tzLayout)
} else {
return "", fmt.Errorf("invalid format string, unknown format specifier: %s", string(formatSpecifier))
}
continue
}
// copy the character as is
dateTimeLayout.WriteByte(formatString[idx])
idx++
}
return dateTimeLayout.String(), nil
}
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
for _, layout := range p.layouts {
rv, err := time.Parse(layout, input)
if err == nil {
return rv, layout, nil
}
}
return time.Time{}, "", analysis.ErrInvalidDateTime
}
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
layouts, ok := config["layouts"].([]interface{})
if !ok {
return nil, fmt.Errorf("must specify layouts")
}
layoutStrs := make([]string, 0, len(layouts))
for _, layout := range layouts {
layoutStr, ok := layout.(string)
if ok {
layout, err := parseFormatString(layoutStr)
if err != nil {
return nil, err
}
layoutStrs = append(layoutStrs, layout)
}
}
return New(layoutStrs), nil
}
func init() {
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/datetime/percent/percent_test.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package percent
import (
"fmt"
"reflect"
"testing"
"time"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestConversionFromPercentStyle(t *testing.T) {
tests := []struct {
name string // Added name field
input string
output string
err error
}{
{
name: "basic YMD",
input: "%Y-%m-%d",
output: "2006-01-02",
err: nil,
},
{
name: "YMD with double percent and literal T",
input: "%Y/%m%%%%%dT%H%M:%S",
output: "2006/01%%02T1504:05",
err: nil,
},
{
name: "YMD T HMS Z z",
input: "%Y-%m-%dT%H:%M:%S %Z%z",
output: "2006-01-02T15:04:05 MSTZ0700",
err: nil,
},
{
name: "Full month, padded day/hour, am/pm, z:M",
input: "%B %e, %Y %l:%i %P %z:M",
output: "January 2, 2006 3:4 pm Z07:00",
err: nil,
},
{
name: "Long format with literals and timezone literal :S",
input: "Hour %H Minute %Mseconds %S.%N Timezone:%Z:S, Weekday %a; Day %d Month %b, Year %y",
output: "Hour 15 Minute 04seconds 05.999999999 Timezone:MST:S, Weekday Mon; Day 02 Month Jan, Year 06",
err: nil,
},
{
name: "YMD T HMS with nanoseconds",
input: "%Y-%m-%dT%H:%M:%S.%N",
output: "2006-01-02T15:04:05.999999999",
err: nil,
},
{
name: "HMS Z z",
input: "%H:%M:%S %Z %z",
output: "15:04:05 MST Z0700",
err: nil,
},
{
name: "HMS Z z literal colon",
input: "%H:%M:%S %Z %z:",
output: "15:04:05 MST Z0700:",
err: nil,
},
{
name: "HMS Z z:M",
input: "%H:%M:%S %Z %z:M",
output: "15:04:05 MST Z07:00",
err: nil,
},
{
name: "HMS Z z:S",
input: "%H:%M:%S %Z %z:S",
output: "15:04:05 MST Z07:00:00",
err: nil,
},
{
name: "HMS Z z: literal A",
input: "%H:%M:%S %Z %z:A",
output: "15:04:05 MST Z0700:A",
err: nil,
},
{
name: "HMS Z z literal M",
input: "%H:%M:%S %Z %zM",
output: "15:04:05 MST Z0700M",
err: nil,
},
{
name: "HMS Z zH",
input: "%H:%M:%S %Z %zH",
output: "15:04:05 MST Z07",
err: nil,
},
{
name: "HMS Z zS",
input: "%H:%M:%S %Z %zS",
output: "15:04:05 MST Z070000",
err: nil,
},
{
name: "Complex combination z zS z: zH",
input: "%H:%M:%S %Z %z%Z %zS%z:%zH",
output: "15:04:05 MST Z0700MST Z070000Z0700:Z07",
err: nil,
},
{
name: "z at end",
input: "%Y-%m-%d %z",
output: "2006-01-02 Z0700",
err: nil,
},
{
name: "z: at end",
input: "%Y-%m-%d %z:",
output: "2006-01-02 Z0700:",
err: nil,
},
{
name: "zH at end",
input: "%Y-%m-%d %zH",
output: "2006-01-02 Z07",
err: nil,
},
{
name: "zS at end",
input: "%Y-%m-%d %zS",
output: "2006-01-02 Z070000",
err: nil,
},
{
name: "z:M at end",
input: "%Y-%m-%d %z:M",
output: "2006-01-02 Z07:00",
err: nil,
},
{
name: "z:S at end",
input: "%Y-%m-%d %z:S",
output: "2006-01-02 Z07:00:00",
err: nil,
},
{
name: "z followed by literal X",
input: "%Y-%m-%d %zX",
output: "2006-01-02 Z0700X",
err: nil,
},
{
name: "z: followed by literal X",
input: "%Y-%m-%d %z:X",
output: "2006-01-02 Z0700:X",
err: nil,
},
{
name: "Invalid specifier T",
input: "%Y-%m-%d%T%H:%M:%S %ZM",
output: "",
err: fmt.Errorf("invalid format string, unknown format specifier: T"),
},
{
name: "Ends with %",
input: "%Y-%m-%dT%H:%M:%S %ZM%",
output: "",
err: fmt.Errorf("invalid format string, expected character after %%"),
},
{
name: "Just %",
input: "%",
output: "",
err: fmt.Errorf("invalid format string, expected character after %%"),
},
{
name: "Just %%",
input: "%%",
output: "%",
err: nil,
},
{
name: "Unknown specifier x",
input: "%x",
output: "",
err: fmt.Errorf("invalid format string, unknown format specifier: x"),
},
{
name: "Literal prefix",
input: "literal %Y",
output: "literal 2006",
err: nil,
},
{
name: "Literal suffix",
input: "%Y literal",
output: "2006 literal",
err: nil,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
out, err := parseFormatString(test.input)
// Enhanced Error Check:
expectedErrStr := ""
if test.err != nil {
expectedErrStr = test.err.Error()
}
actualErrStr := ""
if err != nil {
actualErrStr = err.Error()
}
if expectedErrStr != actualErrStr {
// Provide more detailed output if errors don't match as strings
t.Fatalf("error mismatch:\nExpected error: %q\nGot error : %q", expectedErrStr, actualErrStr)
}
// Original error presence check (redundant if string check passes, but safe to keep)
if (err != nil && test.err == nil) || (err == nil && test.err != nil) {
t.Fatalf("presence mismatch: expected error %v, got error %v", test.err, err)
}
// Check output matching only if no error was expected/occurred
if err == nil && test.err == nil && out != test.output {
t.Fatalf("output mismatch: expected '%v', got '%v'", test.output, out)
}
})
}
}
func TestDateTimeParser_ParseDateTime(t *testing.T) {
// Pre-create some parsers with known Go layouts
parser1 := New([]string{"2006-01-02", "01/02/2006"}) // YYYY-MM-DD, MM/DD/YYYY
parser2 := New([]string{"15:04:05"}) // HH:MM:SS
parserEmpty := New([]string{}) // No layouts
// Define expected time values
time1, _ := time.Parse("2006-01-02", "2023-10-27")
time2, _ := time.Parse("01/02/2006", "10/27/2023")
time3, _ := time.Parse("15:04:05", "14:30:00")
tests := []struct {
name string
parser *DateTimeParser
input string
expectTime time.Time
expectLayout string
expectErr error
}{
{
name: "match first layout",
parser: parser1,
input: "2023-10-27",
expectTime: time1,
expectLayout: "2006-01-02",
expectErr: nil,
},
{
name: "match second layout",
parser: parser1,
input: "10/27/2023",
expectTime: time2,
expectLayout: "01/02/2006",
expectErr: nil,
},
{
name: "no matching layout",
parser: parser1,
input: "14:30:00", // Matches parser2's layout, not parser1's
expectTime: time.Time{},
expectLayout: "",
expectErr: analysis.ErrInvalidDateTime,
},
{
name: "match only layout",
parser: parser2,
input: "14:30:00",
expectTime: time3,
expectLayout: "15:04:05",
expectErr: nil,
},
{
name: "invalid date format for layout",
parser: parser1,
input: "27-10-2023", // Wrong separators
expectTime: time.Time{},
expectLayout: "",
expectErr: analysis.ErrInvalidDateTime, // time.Parse fails on all, returns ErrInvalidDateTime
},
{
name: "empty input",
parser: parser1,
input: "",
expectTime: time.Time{},
expectLayout: "",
expectErr: analysis.ErrInvalidDateTime,
},
{
name: "parser with no layouts",
parser: parserEmpty,
input: "2023-10-27",
expectTime: time.Time{},
expectLayout: "",
expectErr: analysis.ErrInvalidDateTime,
},
{
name: "not a date string",
parser: parser1,
input: "hello world",
expectTime: time.Time{},
expectLayout: "",
expectErr: analysis.ErrInvalidDateTime,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
gotTime, gotLayout, gotErr := test.parser.ParseDateTime(test.input)
// Check error
if !reflect.DeepEqual(gotErr, test.expectErr) {
t.Fatalf("error mismatch:\nExpected: %v\nGot: %v", test.expectErr, gotErr)
}
// Check time only if no error expected
if test.expectErr == nil {
if !gotTime.Equal(test.expectTime) {
t.Errorf("time mismatch:\nExpected: %v\nGot: %v", test.expectTime, gotTime)
}
if gotLayout != test.expectLayout {
t.Errorf("layout mismatch:\nExpected: %q\nGot: %q", test.expectLayout, gotLayout)
}
}
})
}
}
func TestDateTimeParserConstructor(t *testing.T) {
tests := []struct {
name string
config map[string]interface{}
expectLayouts []string // Expected Go layouts after parsing
expectErr error
}{
{
name: "valid config with multiple layouts",
config: map[string]interface{}{
"layouts": []interface{}{"%Y-%m-%d", "%H:%M:%S %Z"},
},
expectLayouts: []string{"2006-01-02", "15:04:05 MST"},
expectErr: nil,
},
{
name: "valid config with single layout",
config: map[string]interface{}{
"layouts": []interface{}{"%Y/%m/%d %z:M"},
},
expectLayouts: []string{"2006/01/02 Z07:00"},
expectErr: nil,
},
{
name: "valid config with complex layout",
config: map[string]interface{}{
"layouts": []interface{}{"%a, %d %b %Y %H:%M:%S %zH"},
},
expectLayouts: []string{"Mon, 02 Jan 2006 15:04:05 Z07"},
expectErr: nil,
},
{
name: "config missing layouts key",
config: map[string]interface{}{
"other_key": "value",
},
expectLayouts: nil,
expectErr: fmt.Errorf("must specify layouts"),
},
{
name: "config layouts not a slice",
config: map[string]interface{}{
"layouts": "not-a-slice", // Value is a string
},
expectLayouts: nil,
// Update the expected error message
expectErr: fmt.Errorf("must specify layouts"),
},
{
name: "config layouts contains non-string",
config: map[string]interface{}{
"layouts": []interface{}{"%Y-%m-%d", 123},
},
// Should process the valid string, ignore the int
expectLayouts: []string{"2006-01-02"},
expectErr: nil,
},
{
name: "config layouts contains invalid percent format",
config: map[string]interface{}{
"layouts": []interface{}{"%Y-%m-%d", "%x"}, // %x is invalid
},
expectLayouts: nil,
expectErr: fmt.Errorf("invalid format string, unknown format specifier: x"),
},
{
name: "config layouts contains format ending in %",
config: map[string]interface{}{
"layouts": []interface{}{"%Y-%m-%d", "%H:%M:%"},
},
expectLayouts: nil,
expectErr: fmt.Errorf("invalid format string, expected character after %%"),
},
{
name: "config with empty layouts slice",
config: map[string]interface{}{
"layouts": []interface{}{},
},
expectLayouts: []string{}, // Expect an empty slice, not nil
expectErr: nil,
},
{
name: "nil config",
config: nil,
expectLayouts: nil,
expectErr: fmt.Errorf("must specify layouts"),
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
// Cache is not used by this constructor, so nil is fine
parserIntf, err := DateTimeParserConstructor(test.config, nil)
// Check error
// Use string comparison for errors as they might be created differently
expectedErrStr := ""
if test.expectErr != nil {
expectedErrStr = test.expectErr.Error()
}
actualErrStr := ""
if err != nil {
actualErrStr = err.Error()
}
if expectedErrStr != actualErrStr {
t.Fatalf("error mismatch:\nExpected: %q\nGot: %q", expectedErrStr, actualErrStr)
}
// Check layouts only if no error expected
if test.expectErr == nil {
// Type assert to access the layouts field
parser, ok := parserIntf.(*DateTimeParser)
if !ok {
t.Fatalf("constructor did not return a *DateTimeParser")
}
if !reflect.DeepEqual(parser.layouts, test.expectLayouts) {
t.Errorf("layouts mismatch:\nExpected: %v\nGot: %v", test.expectLayouts, parser.layouts)
}
}
})
}
}
================================================
FILE: analysis/datetime/sanitized/sanitized.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package sanitized
import (
"fmt"
"regexp"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "sanitizedgo"
var validMagicNumbers = map[string]struct{}{
"2006": {},
"06": {}, // Year
"01": {},
"1": {},
"_1": {},
"January": {},
"Jan": {}, // Month
"02": {},
"2": {},
"_2": {},
"__2": {},
"002": {},
"Monday": {},
"Mon": {}, // Day
"15": {},
"3": {},
"03": {}, // Hour
"4": {},
"04": {}, // Minute
"5": {},
"05": {}, // Second
"0700": {},
"070000": {},
"07": {},
"00": {},
"": {},
}
var layoutSplitRegex = regexp.MustCompile("[\\+\\-= :T,Z\\.<>;\\?!`~@#$%\\^&\\*|'\"\\(\\){}\\[\\]/\\\\]")
var layoutStripRegex = regexp.MustCompile(`PM|pm|\.9+|\.0+|MST`)
type DateTimeParser struct {
layouts []string
}
func New(layouts []string) *DateTimeParser {
return &DateTimeParser{
layouts: layouts,
}
}
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
for _, layout := range p.layouts {
rv, err := time.Parse(layout, input)
if err == nil {
return rv, layout, nil
}
}
return time.Time{}, "", analysis.ErrInvalidDateTime
}
// date time layouts must be a combination of constants specified in golang time package
// https://pkg.go.dev/time#pkg-constants
// this validation verifies that only these constants are used in the custom layout
// for compatibility with the golang time package
func validateLayout(layout string) bool {
// first we strip out commonly used constants
// such as "PM" which can be present in the layout
// right after a time component, e.g. 03:04PM;
// because regex split cannot separate "03:04PM" into
// "03:04" and "PM". We also strip out ".9+" and ".0+"
// which represent fractional seconds.
layout = layoutStripRegex.ReplaceAllString(layout, "")
// then we split the layout by non-constant characters
// which is a regex and verify that each split is a valid magic number
split := layoutSplitRegex.Split(layout, -1)
for i := range split {
_, found := validMagicNumbers[split[i]]
if !found {
return false
}
}
return true
}
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
layouts, ok := config["layouts"].([]interface{})
if !ok {
return nil, fmt.Errorf("must specify layouts")
}
var layoutStrs []string
for _, layout := range layouts {
layoutStr, ok := layout.(string)
if ok {
if !validateLayout(layoutStr) {
return nil, fmt.Errorf("invalid datetime parser layout: %s,"+
" please refer to https://pkg.go.dev/time#pkg-constants for supported"+
" layouts", layoutStr)
}
layoutStrs = append(layoutStrs, layoutStr)
}
}
return New(layoutStrs), nil
}
func init() {
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/datetime/sanitized/sanitized_test.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package sanitized
import (
"reflect"
"testing"
)
func TestLayoutValidatorRegex(t *testing.T) {
splitRegexTests := []struct {
input string
output []string
}{
{
input: "2014-08-03",
output: []string{"2014", "08", "03"},
},
{
input: "2014-08-03T15:59:30",
output: []string{"2014", "08", "03", "15", "59", "30"},
},
{
input: "2014.08-03 15/59`30",
output: []string{"2014", "08", "03", "15", "59", "30"},
},
{
input: "2014/08/03T15:59:30Z08:00",
output: []string{"2014", "08", "03", "15", "59", "30", "08", "00"},
},
{
input: "2014\\08|03T15=59.30.999999999+08*00",
output: []string{"2014", "08", "03", "15", "59", "30", "999999999", "08", "00"},
},
{
input: "2006-01-02T15:04:05.999999999Z07:00",
output: []string{"2006", "01", "02", "15", "04", "05", "999999999", "07", "00"},
},
{
input: "A-B C:DTE,FZG.HJ;K?L!M`N~O@P#Q$R%S^U&V*W|X'Y\"A(B)C{D}E[F]G/H\\I+J=L",
output: []string{"A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P",
"Q", "R", "S", "U", "V", "W", "X", "Y", "A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "L"},
},
}
regex := layoutSplitRegex
for _, test := range splitRegexTests {
t.Run(test.input, func(t *testing.T) {
actualOutput := regex.Split(test.input, -1)
if !reflect.DeepEqual(actualOutput, test.output) {
t.Fatalf("expected output %v, got %v", test.output, actualOutput)
}
})
}
stripRegexTests := []struct {
input string
output string
}{
{
input: "3PM",
output: "3",
},
{
input: "3.0PM",
output: "3",
},
{
input: "3.9AM",
output: "3AM",
},
{
input: "3.999999999pm",
output: "3",
},
{
input: "2006-01-02T15:04:05.999999999Z07:00MST",
output: "2006-01-02T15:04:05Z07:00",
},
{
input: "Jan _2 15:04:05.0000000+07:00MST",
output: "Jan _2 15:04:05+07:00",
},
{
input: "15:04:05.99PM+07:00MST",
output: "15:04:05+07:00",
},
}
regex = layoutStripRegex
for _, test := range stripRegexTests {
t.Run(test.input, func(t *testing.T) {
actualOutput := layoutStripRegex.ReplaceAllString(test.input, "")
if !reflect.DeepEqual(actualOutput, test.output) {
t.Fatalf("expected output %v, got %v", test.output, actualOutput)
}
})
}
}
================================================
FILE: analysis/datetime/timestamp/microseconds/microseconds.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package microseconds
import (
"math"
"strconv"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "unix_micro"
type DateTimeParser struct {
}
var minBound int64 = math.MinInt64 / 1000
var maxBound int64 = math.MaxInt64 / 1000
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
// unix timestamp is milliseconds since UNIX epoch
timestamp, err := strconv.ParseInt(input, 10, 64)
if err != nil {
return time.Time{}, "", analysis.ErrInvalidTimestampString
}
if timestamp < minBound || timestamp > maxBound {
return time.Time{}, "", analysis.ErrInvalidTimestampRange
}
return time.UnixMicro(timestamp), Name, nil
}
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
return &DateTimeParser{}, nil
}
func init() {
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/datetime/timestamp/milliseconds/milliseconds.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package milliseconds
import (
"math"
"strconv"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "unix_milli"
type DateTimeParser struct {
}
var minBound int64 = math.MinInt64 / 1000000
var maxBound int64 = math.MaxInt64 / 1000000
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
// unix timestamp is milliseconds since UNIX epoch
timestamp, err := strconv.ParseInt(input, 10, 64)
if err != nil {
return time.Time{}, "", analysis.ErrInvalidTimestampString
}
if timestamp < minBound || timestamp > maxBound {
return time.Time{}, "", analysis.ErrInvalidTimestampRange
}
return time.UnixMilli(timestamp), Name, nil
}
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
return &DateTimeParser{}, nil
}
func init() {
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/datetime/timestamp/nanoseconds/nanoseconds.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package nanoseconds
import (
"math"
"strconv"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "unix_nano"
type DateTimeParser struct {
}
var minBound int64 = math.MinInt64
var maxBound int64 = math.MaxInt64
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
// unix timestamp is milliseconds since UNIX epoch
timestamp, err := strconv.ParseInt(input, 10, 64)
if err != nil {
return time.Time{}, "", analysis.ErrInvalidTimestampString
}
if timestamp < minBound || timestamp > maxBound {
return time.Time{}, "", analysis.ErrInvalidTimestampRange
}
return time.Unix(0, timestamp), Name, nil
}
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
return &DateTimeParser{}, nil
}
func init() {
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/datetime/timestamp/seconds/seconds.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package seconds
import (
"math"
"strconv"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "unix_sec"
type DateTimeParser struct {
}
var minBound int64 = math.MinInt64 / 1000000000
var maxBound int64 = math.MaxInt64 / 1000000000
func (p *DateTimeParser) ParseDateTime(input string) (time.Time, string, error) {
// unix timestamp is seconds since UNIX epoch
timestamp, err := strconv.ParseInt(input, 10, 64)
if err != nil {
return time.Time{}, "", analysis.ErrInvalidTimestampString
}
if timestamp < minBound || timestamp > maxBound {
return time.Time{}, "", analysis.ErrInvalidTimestampRange
}
return time.Unix(timestamp, 0), Name, nil
}
func DateTimeParserConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.DateTimeParser, error) {
return &DateTimeParser{}, nil
}
func init() {
err := registry.RegisterDateTimeParser(Name, DateTimeParserConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/freq.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package analysis
import (
index "github.com/blevesearch/bleve_index_api"
)
func TokenFrequency(tokens TokenStream, arrayPositions []uint64, options index.FieldIndexingOptions) index.TokenFrequencies {
rv := make(map[string]*index.TokenFreq, len(tokens))
if options.IncludeTermVectors() {
tls := make([]index.TokenLocation, len(tokens))
tlNext := 0
for _, token := range tokens {
tls[tlNext] = index.TokenLocation{
ArrayPositions: arrayPositions,
Start: token.Start,
End: token.End,
Position: token.Position,
}
curr, ok := rv[string(token.Term)]
if ok {
curr.Locations = append(curr.Locations, &tls[tlNext])
} else {
curr = &index.TokenFreq{
Term: token.Term,
Locations: []*index.TokenLocation{&tls[tlNext]},
}
rv[string(token.Term)] = curr
}
if !options.SkipFreqNorm() {
curr.SetFrequency(curr.Frequency() + 1)
}
tlNext++
}
} else {
for _, token := range tokens {
curr, exists := rv[string(token.Term)]
if !exists {
curr = &index.TokenFreq{
Term: token.Term,
}
rv[string(token.Term)] = curr
}
if !options.SkipFreqNorm() {
curr.SetFrequency(curr.Frequency() + 1)
}
}
}
return rv
}
================================================
FILE: analysis/freq_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package analysis
import (
index "github.com/blevesearch/bleve_index_api"
"reflect"
"testing"
)
func TestTokenFrequency(t *testing.T) {
tokens := TokenStream{
&Token{
Term: []byte("water"),
Position: 1,
Start: 0,
End: 5,
},
&Token{
Term: []byte("water"),
Position: 2,
Start: 6,
End: 11,
},
}
expectedResult := index.TokenFrequencies{
"water": &index.TokenFreq{
Term: []byte("water"),
Locations: []*index.TokenLocation{
{
Position: 1,
Start: 0,
End: 5,
},
{
Position: 2,
Start: 6,
End: 11,
},
},
},
}
expectedResult["water"].SetFrequency(2)
result := TokenFrequency(tokens, nil, index.IncludeTermVectors)
if !reflect.DeepEqual(result, expectedResult) {
t.Errorf("expected %#v, got %#v", expectedResult, result)
}
}
================================================
FILE: analysis/lang/ar/analyzer_ar.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ar
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "ar"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
normalizeFilter := unicodenorm.MustNewUnicodeNormalizeFilter(unicodenorm.NFKC)
stopArFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
normalizeArFilter, err := cache.TokenFilterNamed(NormalizeName)
if err != nil {
return nil, err
}
stemmerArFilter, err := cache.TokenFilterNamed(StemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
normalizeFilter,
stopArFilter,
normalizeArFilter,
stemmerArFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ar/analyzer_ar_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ar
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestArabicAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
{
input: []byte("كبير"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("كبير"),
Position: 1,
Start: 0,
End: 8,
},
},
},
// feminine marker
{
input: []byte("كبيرة"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("كبير"),
Position: 1,
Start: 0,
End: 10,
},
},
},
{
input: []byte("مشروب"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("مشروب"),
Position: 1,
Start: 0,
End: 10,
},
},
},
// plural -at
{
input: []byte("مشروبات"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("مشروب"),
Position: 1,
Start: 0,
End: 14,
},
},
},
// plural -in
{
input: []byte("أمريكيين"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("امريك"),
Position: 1,
Start: 0,
End: 16,
},
},
},
// singular with bare alif
{
input: []byte("امريكي"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("امريك"),
Position: 1,
Start: 0,
End: 12,
},
},
},
{
input: []byte("كتاب"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("كتاب"),
Position: 1,
Start: 0,
End: 8,
},
},
},
// definite article
{
input: []byte("الكتاب"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("كتاب"),
Position: 1,
Start: 0,
End: 12,
},
},
},
{
input: []byte("ما ملكت أيمانكم"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ملكت"),
Position: 2,
Start: 5,
End: 13,
},
&analysis.Token{
Term: []byte("ايمانكم"),
Position: 3,
Start: 14,
End: 28,
},
},
},
// stopwords
{
input: []byte("الذين ملكت أيمانكم"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ملكت"),
Position: 2,
Start: 11,
End: 19,
},
&analysis.Token{
Term: []byte("ايمانكم"),
Position: 3,
Start: 20,
End: 34,
},
},
},
// presentation form normalization
{
input: []byte("ﺍﻟﺴﻼﻢ"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("سلام"),
Position: 1,
Start: 0,
End: 15,
},
},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/ar/arabic_normalize.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ar
import (
"bytes"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const NormalizeName = "normalize_ar"
const (
Alef = '\u0627'
AlefMadda = '\u0622'
AlefHamzaAbove = '\u0623'
AlefHamzaBelow = '\u0625'
Yeh = '\u064A'
DotlessYeh = '\u0649'
TehMarbuta = '\u0629'
Heh = '\u0647'
Tatweel = '\u0640'
Fathatan = '\u064B'
Dammatan = '\u064C'
Kasratan = '\u064D'
Fatha = '\u064E'
Damma = '\u064F'
Kasra = '\u0650'
Shadda = '\u0651'
Sukun = '\u0652'
)
type ArabicNormalizeFilter struct {
}
func NewArabicNormalizeFilter() *ArabicNormalizeFilter {
return &ArabicNormalizeFilter{}
}
func (s *ArabicNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
term := normalize(token.Term)
token.Term = term
}
return input
}
func normalize(input []byte) []byte {
runes := bytes.Runes(input)
for i := 0; i < len(runes); i++ {
switch runes[i] {
case AlefMadda, AlefHamzaAbove, AlefHamzaBelow:
runes[i] = Alef
case DotlessYeh:
runes[i] = Yeh
case TehMarbuta:
runes[i] = Heh
case Tatweel, Kasratan, Dammatan, Fathatan, Fatha, Damma, Kasra, Shadda, Sukun:
runes = analysis.DeleteRune(runes, i)
i--
}
}
return analysis.BuildTermFromRunes(runes)
}
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewArabicNormalizeFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ar/arabic_normalize_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ar
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestArabicNormalizeFilter(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
// AlifMadda
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("آجن"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("اجن"),
},
},
},
// AlifHamzaAbove
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("أحمد"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("احمد"),
},
},
},
// AlifHamzaBelow
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("إعاذ"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("اعاذ"),
},
},
},
// AlifMaksura
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("بنى"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("بني"),
},
},
},
// TehMarbuta
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("فاطمة"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("فاطمه"),
},
},
},
// Tatweel
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("روبرـــــت"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("روبرت"),
},
},
},
// Fatha
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("مَبنا"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("مبنا"),
},
},
},
// Kasra
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("علِي"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("علي"),
},
},
},
// Damma
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("بُوات"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("بوات"),
},
},
},
// Fathatan
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ولداً"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ولدا"),
},
},
},
// Kasratan
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ولدٍ"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ولد"),
},
},
},
// Dammatan
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ولدٌ"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ولد"),
},
},
},
// Sukun
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("نلْسون"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("نلسون"),
},
},
},
// Shaddah
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("هتميّ"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("هتمي"),
},
},
},
// empty
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
}
arabicNormalizeFilter := NewArabicNormalizeFilter()
for _, test := range tests {
actual := arabicNormalizeFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %#v, got %#v", test.output, actual)
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/ar/stemmer_ar.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ar
import (
"bytes"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StemmerName = "stemmer_ar"
// These were obtained from org.apache.lucene.analysis.ar.ArabicStemmer
var prefixes = [][]rune{
[]rune("ال"),
[]rune("وال"),
[]rune("بال"),
[]rune("كال"),
[]rune("فال"),
[]rune("لل"),
[]rune("و"),
}
var suffixes = [][]rune{
[]rune("ها"),
[]rune("ان"),
[]rune("ات"),
[]rune("ون"),
[]rune("ين"),
[]rune("يه"),
[]rune("ية"),
[]rune("ه"),
[]rune("ة"),
[]rune("ي"),
}
type ArabicStemmerFilter struct{}
func NewArabicStemmerFilter() *ArabicStemmerFilter {
return &ArabicStemmerFilter{}
}
func (s *ArabicStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
term := stem(token.Term)
token.Term = term
}
return input
}
func canStemPrefix(input, prefix []rune) bool {
// Wa- prefix requires at least 3 characters.
if len(prefix) == 1 && len(input) < 4 {
return false
}
// Other prefixes require only 2.
if len(input)-len(prefix) < 2 {
return false
}
for i := range prefix {
if prefix[i] != input[i] {
return false
}
}
return true
}
func canStemSuffix(input, suffix []rune) bool {
// All suffixes require at least 2 characters after stemming.
if len(input)-len(suffix) < 2 {
return false
}
stemEnd := len(input) - len(suffix)
for i := range suffix {
if suffix[i] != input[stemEnd+i] {
return false
}
}
return true
}
func stem(input []byte) []byte {
runes := bytes.Runes(input)
// Strip a single prefix.
for _, p := range prefixes {
if canStemPrefix(runes, p) {
runes = runes[len(p):]
break
}
}
// Strip off multiple suffixes, in their order in the suffixes array.
for _, s := range suffixes {
if canStemSuffix(runes, s) {
runes = runes[:len(runes)-len(s)]
}
}
return analysis.BuildTermFromRunes(runes)
}
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewArabicStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ar/stemmer_ar_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ar
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestArabicStemmerFilter(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
// AlPrefix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("الحسن"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("حسن"),
},
},
},
// WalPrefix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("والحسن"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("حسن"),
},
},
},
// BalPrefix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("بالحسن"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("حسن"),
},
},
},
// KalPrefix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("كالحسن"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("حسن"),
},
},
},
// FalPrefix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("فالحسن"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("حسن"),
},
},
},
// LlPrefix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("للاخر"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("اخر"),
},
},
},
// WaPrefix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("وحسن"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("حسن"),
},
},
},
// AhSuffix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("زوجها"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("زوج"),
},
},
},
// AnSuffix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهدان"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهد"),
},
},
},
// AtSuffix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهدات"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهد"),
},
},
},
// WnSuffix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهدون"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهد"),
},
},
},
// YnSuffix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهدين"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهد"),
},
},
},
// YhSuffix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهديه"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهد"),
},
},
},
// YpSuffix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهدية"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهد"),
},
},
},
// HSuffix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهده"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهد"),
},
},
},
// PSuffix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهدة"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهد"),
},
},
},
// YSuffix
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهدي"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهد"),
},
},
},
// ComboPrefSuf
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("وساهدون"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهد"),
},
},
},
// ComboSuf
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهدهات"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ساهد"),
},
},
},
// Shouldn't Stem
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("الو"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("الو"),
},
},
},
// NonArabic
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("English"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("English"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("سلام"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("سلام"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("السلام"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("سلام"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("سلامة"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("سلام"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("السلامة"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("سلام"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("الوصل"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("وصل"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("والصل"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("صل"),
},
},
},
// Empty
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
}
arabicStemmerFilter := NewArabicStemmerFilter()
for _, test := range tests {
actual := arabicStemmerFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %#v, got %#v", test.output, actual)
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/ar/stop_filter_ar.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ar
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ar/stop_words_ar.go
================================================
package ar
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_ar"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
// ` was changed to ' to allow for literal string
var ArabicStopWords = []byte(`# This file was created by Jacques Savoy and is distributed under the BSD license.
# See http://members.unine.ch/jacques.savoy/clef/index.html.
# Also see http://www.opensource.org/licenses/bsd-license.html
# Cleaned on October 11, 2009 (not normalized, so use before normalization)
# This means that when modifying this list, you might need to add some
# redundant entries, for example containing forms with both أ and ا
من
ومن
منها
منه
في
وفي
فيها
فيه
و
ف
ثم
او
أو
ب
بها
به
ا
أ
اى
اي
أي
أى
لا
ولا
الا
ألا
إلا
لكن
ما
وما
كما
فما
عن
مع
اذا
إذا
ان
أن
إن
انها
أنها
إنها
انه
أنه
إنه
بان
بأن
فان
فأن
وان
وأن
وإن
التى
التي
الذى
الذي
الذين
الى
الي
إلى
إلي
على
عليها
عليه
اما
أما
إما
ايضا
أيضا
كل
وكل
لم
ولم
لن
ولن
هى
هي
هو
وهى
وهي
وهو
فهى
فهي
فهو
انت
أنت
لك
لها
له
هذه
هذا
تلك
ذلك
هناك
كانت
كان
يكون
تكون
وكانت
وكان
غير
بعض
قد
نحو
بين
بينما
منذ
ضمن
حيث
الان
الآن
خلال
بعد
قبل
حتى
عند
عندما
لدى
جميع
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(ArabicStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/bg/stop_filter_bg.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bg
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/bg/stop_words_bg.go
================================================
package bg
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_bg"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
// ` was changed to ' to allow for literal string
var BulgarianStopWords = []byte(`# This file was created by Jacques Savoy and is distributed under the BSD license.
# See http://members.unine.ch/jacques.savoy/clef/index.html.
# Also see http://www.opensource.org/licenses/bsd-license.html
а
аз
ако
ала
бе
без
беше
би
бил
била
били
било
близо
бъдат
бъде
бяха
в
вас
ваш
ваша
вероятно
вече
взема
ви
вие
винаги
все
всеки
всички
всичко
всяка
във
въпреки
върху
г
ги
главно
го
д
да
дали
до
докато
докога
дори
досега
доста
е
едва
един
ето
за
зад
заедно
заради
засега
затова
защо
защото
и
из
или
им
има
имат
иска
й
каза
как
каква
какво
както
какъв
като
кога
когато
което
които
кой
който
колко
която
къде
където
към
ли
м
ме
между
мен
ми
мнозина
мога
могат
може
моля
момента
му
н
на
над
назад
най
направи
напред
например
нас
не
него
нея
ни
ние
никой
нито
но
някои
някой
няма
обаче
около
освен
особено
от
отгоре
отново
още
пак
по
повече
повечето
под
поне
поради
после
почти
прави
пред
преди
през
при
пък
първо
с
са
само
се
сега
си
скоро
след
сме
според
сред
срещу
сте
съм
със
също
т
тази
така
такива
такъв
там
твой
те
тези
ти
тн
то
това
тогава
този
той
толкова
точно
трябва
тук
тъй
тя
тях
у
харесва
ч
че
често
чрез
ще
щом
я
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(BulgarianStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ca/articles_ca.go
================================================
package ca
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const ArticlesName = "articles_ca"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
var CatalanArticles = []byte(`
d
l
m
n
s
t
`)
func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(CatalanArticles)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ca/elision_ca.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ca
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/elision"
"github.com/blevesearch/bleve/v2/registry"
)
const ElisionName = "elision_ca"
func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
articlesTokenMap, err := cache.TokenMapNamed(ArticlesName)
if err != nil {
return nil, fmt.Errorf("error building elision filter: %v", err)
}
return elision.NewElisionFilter(articlesTokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ca/elision_ca_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ca
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestFrenchElision(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("l'Institut"),
},
&analysis.Token{
Term: []byte("d'Estudis"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Institut"),
},
&analysis.Token{
Term: []byte("Estudis"),
},
},
},
}
cache := registry.NewCache()
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := elisionFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/ca/stop_filter_ca.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ca
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ca/stop_words_ca.go
================================================
package ca
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_ca"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
// ` was changed to ' to allow for literal string
var CatalanStopWords = []byte(`# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
a
abans
ací
ah
així
això
al
als
aleshores
algun
alguna
algunes
alguns
alhora
allà
allí
allò
altra
altre
altres
amb
ambdós
ambdues
apa
aquell
aquella
aquelles
aquells
aquest
aquesta
aquestes
aquests
aquí
baix
cada
cadascú
cadascuna
cadascunes
cadascuns
com
contra
d'un
d'una
d'unes
d'uns
dalt
de
del
dels
des
després
dins
dintre
donat
doncs
durant
e
eh
el
els
em
en
encara
ens
entre
érem
eren
éreu
es
és
esta
està
estàvem
estaven
estàveu
esteu
et
etc
ets
fins
fora
gairebé
ha
han
has
havia
he
hem
heu
hi
ho
i
igual
iguals
ja
l'hi
la
les
li
li'n
llavors
m'he
ma
mal
malgrat
mateix
mateixa
mateixes
mateixos
me
mentre
més
meu
meus
meva
meves
molt
molta
moltes
molts
mon
mons
n'he
n'hi
ne
ni
no
nogensmenys
només
nosaltres
nostra
nostre
nostres
o
oh
oi
on
pas
pel
pels
per
però
perquè
poc
poca
pocs
poques
potser
propi
qual
quals
quan
quant
que
què
quelcom
qui
quin
quina
quines
quins
s'ha
s'han
sa
semblant
semblants
ses
seu
seus
seva
seva
seves
si
sobre
sobretot
sóc
solament
sols
son
són
sons
sota
sou
t'ha
t'han
t'he
ta
tal
també
tampoc
tan
tant
tanta
tantes
teu
teus
teva
teves
ton
tons
tot
tota
totes
tots
un
una
unes
uns
us
va
vaig
vam
van
vas
veu
vosaltres
vostra
vostre
vostres
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(CatalanStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/cjk/analyzer_cjk.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cjk
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "cjk"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
widthFilter, err := cache.TokenFilterNamed(WidthName)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
bigramFilter, err := cache.TokenFilterNamed(BigramName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
widthFilter,
toLowerFilter,
bigramFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/cjk/analyzer_cjk_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cjk
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestCJKAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
{
input: []byte("こんにちは世界"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("こん"),
Type: analysis.Double,
Position: 1,
Start: 0,
End: 6,
},
&analysis.Token{
Term: []byte("んに"),
Type: analysis.Double,
Position: 2,
Start: 3,
End: 9,
},
&analysis.Token{
Term: []byte("にち"),
Type: analysis.Double,
Position: 3,
Start: 6,
End: 12,
},
&analysis.Token{
Term: []byte("ちは"),
Type: analysis.Double,
Position: 4,
Start: 9,
End: 15,
},
&analysis.Token{
Term: []byte("は世"),
Type: analysis.Double,
Position: 5,
Start: 12,
End: 18,
},
&analysis.Token{
Term: []byte("世界"),
Type: analysis.Double,
Position: 6,
Start: 15,
End: 21,
},
},
},
{
input: []byte("一二三四五六七八九十"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("一二"),
Type: analysis.Double,
Position: 1,
Start: 0,
End: 6,
},
&analysis.Token{
Term: []byte("二三"),
Type: analysis.Double,
Position: 2,
Start: 3,
End: 9,
},
&analysis.Token{
Term: []byte("三四"),
Type: analysis.Double,
Position: 3,
Start: 6,
End: 12,
},
&analysis.Token{
Term: []byte("四五"),
Type: analysis.Double,
Position: 4,
Start: 9,
End: 15,
},
&analysis.Token{
Term: []byte("五六"),
Type: analysis.Double,
Position: 5,
Start: 12,
End: 18,
},
&analysis.Token{
Term: []byte("六七"),
Type: analysis.Double,
Position: 6,
Start: 15,
End: 21,
},
&analysis.Token{
Term: []byte("七八"),
Type: analysis.Double,
Position: 7,
Start: 18,
End: 24,
},
&analysis.Token{
Term: []byte("八九"),
Type: analysis.Double,
Position: 8,
Start: 21,
End: 27,
},
&analysis.Token{
Term: []byte("九十"),
Type: analysis.Double,
Position: 9,
Start: 24,
End: 30,
},
},
},
{
input: []byte("一 二三四 五六七八九 十"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("一"),
Type: analysis.Single,
Position: 1,
Start: 0,
End: 3,
},
&analysis.Token{
Term: []byte("二三"),
Type: analysis.Double,
Position: 2,
Start: 4,
End: 10,
},
&analysis.Token{
Term: []byte("三四"),
Type: analysis.Double,
Position: 3,
Start: 7,
End: 13,
},
&analysis.Token{
Term: []byte("五六"),
Type: analysis.Double,
Position: 4,
Start: 14,
End: 20,
},
&analysis.Token{
Term: []byte("六七"),
Type: analysis.Double,
Position: 5,
Start: 17,
End: 23,
},
&analysis.Token{
Term: []byte("七八"),
Type: analysis.Double,
Position: 6,
Start: 20,
End: 26,
},
&analysis.Token{
Term: []byte("八九"),
Type: analysis.Double,
Position: 7,
Start: 23,
End: 29,
},
&analysis.Token{
Term: []byte("十"),
Type: analysis.Single,
Position: 8,
Start: 30,
End: 33,
},
},
},
{
input: []byte("abc defgh ijklmn opqrstu vwxy z"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("abc"),
Type: analysis.AlphaNumeric,
Position: 1,
Start: 0,
End: 3,
},
&analysis.Token{
Term: []byte("defgh"),
Type: analysis.AlphaNumeric,
Position: 2,
Start: 4,
End: 9,
},
&analysis.Token{
Term: []byte("ijklmn"),
Type: analysis.AlphaNumeric,
Position: 3,
Start: 10,
End: 16,
},
&analysis.Token{
Term: []byte("opqrstu"),
Type: analysis.AlphaNumeric,
Position: 4,
Start: 17,
End: 24,
},
&analysis.Token{
Term: []byte("vwxy"),
Type: analysis.AlphaNumeric,
Position: 5,
Start: 25,
End: 29,
},
&analysis.Token{
Term: []byte("z"),
Type: analysis.AlphaNumeric,
Position: 6,
Start: 30,
End: 31,
},
},
},
{
input: []byte("あい"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("あい"),
Type: analysis.Double,
Position: 1,
Start: 0,
End: 6,
},
},
},
{
input: []byte("あい "),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("あい"),
Type: analysis.Double,
Position: 1,
Start: 0,
End: 6,
},
},
},
{
input: []byte("test"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("test"),
Type: analysis.AlphaNumeric,
Position: 1,
Start: 0,
End: 4,
},
},
},
{
input: []byte("test "),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("test"),
Type: analysis.AlphaNumeric,
Position: 1,
Start: 0,
End: 4,
},
},
},
{
input: []byte("あいtest"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("あい"),
Type: analysis.Double,
Position: 1,
Start: 0,
End: 6,
},
&analysis.Token{
Term: []byte("test"),
Type: analysis.AlphaNumeric,
Position: 2,
Start: 6,
End: 10,
},
},
},
{
input: []byte("testあい "),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("test"),
Type: analysis.AlphaNumeric,
Position: 1,
Start: 0,
End: 4,
},
&analysis.Token{
Term: []byte("あい"),
Type: analysis.Double,
Position: 2,
Start: 4,
End: 10,
},
},
},
{
input: []byte("あいうえおabcかきくけこ"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("あい"),
Type: analysis.Double,
Position: 1,
Start: 0,
End: 6,
},
&analysis.Token{
Term: []byte("いう"),
Type: analysis.Double,
Position: 2,
Start: 3,
End: 9,
},
&analysis.Token{
Term: []byte("うえ"),
Type: analysis.Double,
Position: 3,
Start: 6,
End: 12,
},
&analysis.Token{
Term: []byte("えお"),
Type: analysis.Double,
Position: 4,
Start: 9,
End: 15,
},
&analysis.Token{
Term: []byte("abc"),
Type: analysis.AlphaNumeric,
Position: 5,
Start: 15,
End: 18,
},
&analysis.Token{
Term: []byte("かき"),
Type: analysis.Double,
Position: 6,
Start: 18,
End: 24,
},
&analysis.Token{
Term: []byte("きく"),
Type: analysis.Double,
Position: 7,
Start: 21,
End: 27,
},
&analysis.Token{
Term: []byte("くけ"),
Type: analysis.Double,
Position: 8,
Start: 24,
End: 30,
},
&analysis.Token{
Term: []byte("けこ"),
Type: analysis.Double,
Position: 9,
Start: 27,
End: 33,
},
},
},
{
input: []byte("あいうえおabんcかきくけ こ"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("あい"),
Type: analysis.Double,
Position: 1,
Start: 0,
End: 6,
},
&analysis.Token{
Term: []byte("いう"),
Type: analysis.Double,
Position: 2,
Start: 3,
End: 9,
},
&analysis.Token{
Term: []byte("うえ"),
Type: analysis.Double,
Position: 3,
Start: 6,
End: 12,
},
&analysis.Token{
Term: []byte("えお"),
Type: analysis.Double,
Position: 4,
Start: 9,
End: 15,
},
&analysis.Token{
Term: []byte("ab"),
Type: analysis.AlphaNumeric,
Position: 5,
Start: 15,
End: 17,
},
&analysis.Token{
Term: []byte("ん"),
Type: analysis.Single,
Position: 6,
Start: 17,
End: 20,
},
&analysis.Token{
Term: []byte("c"),
Type: analysis.AlphaNumeric,
Position: 7,
Start: 20,
End: 21,
},
&analysis.Token{
Term: []byte("かき"),
Type: analysis.Double,
Position: 8,
Start: 21,
End: 27,
},
&analysis.Token{
Term: []byte("きく"),
Type: analysis.Double,
Position: 9,
Start: 24,
End: 30,
},
&analysis.Token{
Term: []byte("くけ"),
Type: analysis.Double,
Position: 10,
Start: 27,
End: 33,
},
&analysis.Token{
Term: []byte("こ"),
Type: analysis.Single,
Position: 11,
Start: 34,
End: 37,
},
},
},
{
input: []byte("一 روبرت موير"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("一"),
Type: analysis.Single,
Position: 1,
Start: 0,
End: 3,
},
&analysis.Token{
Term: []byte("روبرت"),
Type: analysis.AlphaNumeric,
Position: 2,
Start: 4,
End: 14,
},
&analysis.Token{
Term: []byte("موير"),
Type: analysis.AlphaNumeric,
Position: 3,
Start: 15,
End: 23,
},
},
},
{
input: []byte("一 رُوبرت موير"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("一"),
Type: analysis.Single,
Position: 1,
Start: 0,
End: 3,
},
&analysis.Token{
Term: []byte("رُوبرت"),
Type: analysis.AlphaNumeric,
Position: 2,
Start: 4,
End: 16,
},
&analysis.Token{
Term: []byte("موير"),
Type: analysis.AlphaNumeric,
Position: 3,
Start: 17,
End: 25,
},
},
},
{
input: []byte("𩬅艱鍟䇹愯瀛"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("𩬅艱"),
Type: analysis.Double,
Position: 1,
Start: 0,
End: 7,
},
&analysis.Token{
Term: []byte("艱鍟"),
Type: analysis.Double,
Position: 2,
Start: 4,
End: 10,
},
&analysis.Token{
Term: []byte("鍟䇹"),
Type: analysis.Double,
Position: 3,
Start: 7,
End: 13,
},
&analysis.Token{
Term: []byte("䇹愯"),
Type: analysis.Double,
Position: 4,
Start: 10,
End: 16,
},
&analysis.Token{
Term: []byte("愯瀛"),
Type: analysis.Double,
Position: 5,
Start: 13,
End: 19,
},
},
},
{
input: []byte("一"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("一"),
Type: analysis.Single,
Position: 1,
Start: 0,
End: 3,
},
},
},
{
input: []byte("一丁丂"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("一丁"),
Type: analysis.Double,
Position: 1,
Start: 0,
End: 6,
},
&analysis.Token{
Term: []byte("丁丂"),
Type: analysis.Double,
Position: 2,
Start: 3,
End: 9,
},
},
},
}
cache := registry.NewCache()
for _, test := range tests {
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
actual := analyzer.Analyze(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
}
}
}
func BenchmarkCJKAnalyzer(b *testing.B) {
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
b.Fatal(err)
}
for i := 0; i < b.N; i++ {
analyzer.Analyze(bleveWikiArticleJapanese)
}
}
var bleveWikiArticleJapanese = []byte(`加圧容器に貯蔵されている液体物質は、その時の気液平衡状態にあるが、火災により容器が加熱されていると容器内の液体は、その物質の大気圧のもとでの沸点より十分に高い温度まで加熱され、圧力も高くなる。この状態で容器が破裂すると容器内部の圧力は瞬間的に大気圧にまで低下する。
この時に容器内の平衡状態が破られ、液体は突沸し、気体になることで爆発現象を起こす。液化石油ガスなどでは、さらに拡散して空気と混ざったガスが自由空間蒸気雲爆発を起こす。液化石油ガスなどの常温常圧で気体になる物を高い圧力で液化して収納している容器、あるいは、そのような液体を輸送するためのパイプラインや配管などが火災などによって破壊されたときに起きる。
ブリーブという現象が明らかになったのは、フランス・リヨンの郊外にあるフェザンという町のフェザン製油所(ウニオン・ド・ゼネラル・ド・ペトロール)で大規模な爆発火災事故が発生したときだと言われている。
中身の液体が高温高圧の水である場合には「水蒸気爆発」と呼ばれる。`)
================================================
FILE: analysis/lang/cjk/cjk_bigram.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cjk
import (
"bytes"
"container/ring"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const BigramName = "cjk_bigram"
type CJKBigramFilter struct {
outputUnigram bool
}
func NewCJKBigramFilter(outputUnigram bool) *CJKBigramFilter {
return &CJKBigramFilter{
outputUnigram: outputUnigram,
}
}
func (s *CJKBigramFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
r := ring.New(2)
itemsInRing := 0
pos := 1
outputPos := 1
rv := make(analysis.TokenStream, 0, len(input))
for _, tokout := range input {
if tokout.Type == analysis.Ideographic {
runes := bytes.Runes(tokout.Term)
sofar := 0
for _, run := range runes {
rlen := utf8.RuneLen(run)
token := &analysis.Token{
Term: tokout.Term[sofar : sofar+rlen],
Start: tokout.Start + sofar,
End: tokout.Start + sofar + rlen,
Position: pos,
Type: tokout.Type,
KeyWord: tokout.KeyWord,
}
pos++
sofar += rlen
if itemsInRing > 0 {
// if items already buffered
// check to see if this is aligned
curr := r.Value.(*analysis.Token)
if token.Start-curr.End != 0 {
// not aligned flush
flushToken := s.flush(r, &itemsInRing, outputPos)
if flushToken != nil {
outputPos++
rv = append(rv, flushToken)
}
}
}
// now we can add this token to the buffer
r = r.Next()
r.Value = token
if itemsInRing < 2 {
itemsInRing++
}
builtUnigram := false
if itemsInRing > 1 && s.outputUnigram {
unigram := s.buildUnigram(r, &itemsInRing, outputPos)
if unigram != nil {
builtUnigram = true
rv = append(rv, unigram)
}
}
bigramToken := s.outputBigram(r, &itemsInRing, outputPos)
if bigramToken != nil {
rv = append(rv, bigramToken)
outputPos++
}
// prev token should be removed if unigram was built
if builtUnigram {
itemsInRing--
}
}
} else {
// flush anything already buffered
flushToken := s.flush(r, &itemsInRing, outputPos)
if flushToken != nil {
rv = append(rv, flushToken)
outputPos++
}
// output this token as is
tokout.Position = outputPos
rv = append(rv, tokout)
outputPos++
}
}
// deal with possible trailing unigram
if itemsInRing == 1 || s.outputUnigram {
if itemsInRing == 2 {
r = r.Next()
}
unigram := s.buildUnigram(r, &itemsInRing, outputPos)
if unigram != nil {
rv = append(rv, unigram)
}
}
return rv
}
func (s *CJKBigramFilter) flush(r *ring.Ring, itemsInRing *int, pos int) *analysis.Token {
var rv *analysis.Token
if *itemsInRing == 1 {
rv = s.buildUnigram(r, itemsInRing, pos)
}
r.Value = nil
*itemsInRing = 0
return rv
}
func (s *CJKBigramFilter) outputBigram(r *ring.Ring, itemsInRing *int, pos int) *analysis.Token {
if *itemsInRing == 2 {
thisShingleRing := r.Move(-1)
shingledBytes := make([]byte, 0)
// do first token
prev := thisShingleRing.Value.(*analysis.Token)
shingledBytes = append(shingledBytes, prev.Term...)
// do second token
thisShingleRing = thisShingleRing.Next()
curr := thisShingleRing.Value.(*analysis.Token)
shingledBytes = append(shingledBytes, curr.Term...)
token := analysis.Token{
Type: analysis.Double,
Term: shingledBytes,
Position: pos,
Start: prev.Start,
End: curr.End,
}
return &token
}
return nil
}
func (s *CJKBigramFilter) buildUnigram(r *ring.Ring, itemsInRing *int, pos int) *analysis.Token {
switch *itemsInRing {
case 2:
thisShingleRing := r.Move(-1)
// do first token
prev := thisShingleRing.Value.(*analysis.Token)
token := analysis.Token{
Type: analysis.Single,
Term: prev.Term,
Position: pos,
Start: prev.Start,
End: prev.End,
}
return &token
case 1:
// do first token
prev := r.Value.(*analysis.Token)
token := analysis.Token{
Type: analysis.Single,
Term: prev.Term,
Position: pos,
Start: prev.Start,
End: prev.End,
}
return &token
}
return nil
}
func CJKBigramFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
outputUnigram := false
outVal, ok := config["output_unigram"].(bool)
if ok {
outputUnigram = outVal
}
return NewCJKBigramFilter(outputUnigram), nil
}
func init() {
err := registry.RegisterTokenFilter(BigramName, CJKBigramFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/cjk/cjk_bigram_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cjk
import (
"container/ring"
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
// Helper function to create a token
func makeToken(term string, start, end, pos int) *analysis.Token {
return &analysis.Token{
Term: []byte(term),
Start: start,
End: end,
Position: pos, // Note: buildUnigram uses the 'pos' argument, not the token's original pos
Type: analysis.Ideographic,
}
}
func TestCJKBigramFilter_buildUnigram(t *testing.T) {
filter := NewCJKBigramFilter(false)
tests := []struct {
name string
ringSetup func() (*ring.Ring, int) // Function to set up the ring and itemsInRing
inputPos int // Position to pass to buildUnigram
expectToken *analysis.Token
}{
{
name: "itemsInRing == 2",
ringSetup: func() (*ring.Ring, int) {
r := ring.New(2)
token1 := makeToken("一", 0, 3, 1) // Original pos 1
token2 := makeToken("二", 3, 6, 2) // Original pos 2
r.Value = token1
r = r.Next()
r.Value = token2
// r currently points to token2, r.Move(-1) points to token1
return r, 2
},
inputPos: 10, // Expected output position
expectToken: &analysis.Token{
Type: analysis.Single,
Term: []byte("一"),
Position: 10, // Should use inputPos
Start: 0,
End: 3,
},
},
{
name: "itemsInRing == 1 (ring points to the single item)",
ringSetup: func() (*ring.Ring, int) {
r := ring.New(2)
token1 := makeToken("三", 6, 9, 3)
r.Value = token1
// r points to token1
return r, 1
},
inputPos: 11,
expectToken: &analysis.Token{
Type: analysis.Single,
Term: []byte("三"),
Position: 11, // Should use inputPos
Start: 6,
End: 9,
},
},
{
name: "itemsInRing == 1 (ring points to nil, next is the single item)",
ringSetup: func() (*ring.Ring, int) {
r := ring.New(2)
token1 := makeToken("四", 9, 12, 4)
r = r.Next() // r points to nil initially
r.Value = token1
// r points to token1
return r, 1
},
inputPos: 12,
expectToken: &analysis.Token{
Type: analysis.Single,
Term: []byte("四"),
Position: 12, // Should use inputPos
Start: 9,
End: 12,
},
},
{
name: "itemsInRing == 0",
ringSetup: func() (*ring.Ring, int) {
r := ring.New(2)
// Ring is empty
return r, 0
},
inputPos: 13,
expectToken: nil, // Expect nil when itemsInRing is not 1 or 2
},
{
name: "itemsInRing > 2 (should behave like 0)",
ringSetup: func() (*ring.Ring, int) {
r := ring.New(2)
token1 := makeToken("五", 12, 15, 5)
token2 := makeToken("六", 15, 18, 6)
r.Value = token1
r = r.Next()
r.Value = token2
// Simulate incorrect itemsInRing count
return r, 3
},
inputPos: 14,
expectToken: nil, // Expect nil when itemsInRing is not 1 or 2
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ringPtr, itemsInRing := tt.ringSetup()
itemsInRingCopy := itemsInRing // Pass a pointer to a copy
gotToken := filter.buildUnigram(ringPtr, &itemsInRingCopy, tt.inputPos)
if !reflect.DeepEqual(gotToken, tt.expectToken) {
t.Errorf("buildUnigram() got = %v, want %v", gotToken, tt.expectToken)
}
// Check if itemsInRing was modified (it shouldn't be by buildUnigram)
if itemsInRingCopy != itemsInRing {
t.Errorf("buildUnigram() modified itemsInRing, got = %d, want %d", itemsInRingCopy, itemsInRing)
}
})
}
}
func TestCJKBigramFilter_outputBigram(t *testing.T) {
// Create a filter instance (outputUnigram value doesn't matter for outputBigram)
filter := NewCJKBigramFilter(false)
tests := []struct {
name string
ringSetup func() (*ring.Ring, int) // Function to set up the ring and itemsInRing
inputPos int // Position to pass to outputBigram
expectToken *analysis.Token
}{
{
name: "itemsInRing == 2",
ringSetup: func() (*ring.Ring, int) {
r := ring.New(2)
token1 := makeToken("一", 0, 3, 1) // Original pos 1
token2 := makeToken("二", 3, 6, 2) // Original pos 2
r.Value = token1
r = r.Next()
r.Value = token2
// r currently points to token2, r.Move(-1) points to token1
return r, 2
},
inputPos: 10, // Expected output position
expectToken: &analysis.Token{
Type: analysis.Double,
Term: []byte("一二"), // Combined term
Position: 10, // Should use inputPos
Start: 0, // Start of first token
End: 6, // End of second token
},
},
{
name: "itemsInRing == 2 with different terms",
ringSetup: func() (*ring.Ring, int) {
r := ring.New(2)
token1 := makeToken("你好", 0, 6, 1)
token2 := makeToken("世界", 6, 12, 2)
r.Value = token1
r = r.Next()
r.Value = token2
return r, 2
},
inputPos: 5,
expectToken: &analysis.Token{
Type: analysis.Double,
Term: []byte("你好世界"),
Position: 5,
Start: 0,
End: 12,
},
},
{
name: "itemsInRing == 1",
ringSetup: func() (*ring.Ring, int) {
r := ring.New(2)
token1 := makeToken("三", 6, 9, 3)
r.Value = token1
return r, 1
},
inputPos: 11,
expectToken: nil, // Expect nil when itemsInRing is not 2
},
{
name: "itemsInRing == 0",
ringSetup: func() (*ring.Ring, int) {
r := ring.New(2)
// Ring is empty
return r, 0
},
inputPos: 13,
expectToken: nil, // Expect nil when itemsInRing is not 2
},
{
name: "itemsInRing > 2 (should behave like 0)",
ringSetup: func() (*ring.Ring, int) {
r := ring.New(2)
token1 := makeToken("五", 12, 15, 5)
token2 := makeToken("六", 15, 18, 6)
r.Value = token1
r = r.Next()
r.Value = token2
// Simulate incorrect itemsInRing count
return r, 3
},
inputPos: 14,
expectToken: nil, // Expect nil when itemsInRing is not 2
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
ringPtr, itemsInRing := tt.ringSetup()
itemsInRingCopy := itemsInRing // Pass a pointer to a copy
gotToken := filter.outputBigram(ringPtr, &itemsInRingCopy, tt.inputPos)
if !reflect.DeepEqual(gotToken, tt.expectToken) {
t.Errorf("outputBigram() got = %v, want %v", gotToken, tt.expectToken)
}
// Check if itemsInRing was modified (it shouldn't be by outputBigram)
if itemsInRingCopy != itemsInRing {
t.Errorf("outputBigram() modified itemsInRing, got = %d, want %d", itemsInRingCopy, itemsInRing)
}
})
}
}
func TestCJKBigramFilter(t *testing.T) {
tests := []struct {
outputUnigram bool
input analysis.TokenStream
output analysis.TokenStream
}{
// first test that non-adjacent terms are not combined
{
outputUnigram: false,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("こ"),
Type: analysis.Ideographic,
Position: 1,
Start: 0,
End: 3,
},
&analysis.Token{
Term: []byte("ん"),
Type: analysis.Ideographic,
Position: 2,
Start: 5,
End: 8,
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("こ"),
Type: analysis.Single,
Position: 1,
Start: 0,
End: 3,
},
&analysis.Token{
Term: []byte("ん"),
Type: analysis.Single,
Position: 2,
Start: 5,
End: 8,
},
},
},
{
outputUnigram: false,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("こ"),
Type: analysis.Ideographic,
Position: 1,
Start: 0,
End: 3,
},
&analysis.Token{
Term: []byte("ん"),
Type: analysis.Ideographic,
Position: 2,
Start: 3,
End: 6,
},
&analysis.Token{
Term: []byte("に"),
Type: analysis.Ideographic,
Position: 3,
Start: 6,
End: 9,
},
&analysis.Token{
Term: []byte("ち"),
Type: analysis.Ideographic,
Position: 4,
Start: 9,
End: 12,
},
&analysis.Token{
Term: []byte("は"),
Type: analysis.Ideographic,
Position: 5,
Start: 12,
End: 15,
},
&analysis.Token{
Term: []byte("世"),
Type: analysis.Ideographic,
Position: 6,
Start: 15,
End: 18,
},
&analysis.Token{
Term: []byte("界"),
Type: analysis.Ideographic,
Position: 7,
Start: 18,
End: 21,
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("こん"),
Type: analysis.Double,
Position: 1,
Start: 0,
End: 6,
},
&analysis.Token{
Term: []byte("んに"),
Type: analysis.Double,
Position: 2,
Start: 3,
End: 9,
},
&analysis.Token{
Term: []byte("にち"),
Type: analysis.Double,
Position: 3,
Start: 6,
End: 12,
},
&analysis.Token{
Term: []byte("ちは"),
Type: analysis.Double,
Position: 4,
Start: 9,
End: 15,
},
&analysis.Token{
Term: []byte("は世"),
Type: analysis.Double,
Position: 5,
Start: 12,
End: 18,
},
&analysis.Token{
Term: []byte("世界"),
Type: analysis.Double,
Position: 6,
Start: 15,
End: 21,
},
},
},
{
outputUnigram: true,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("こ"),
Type: analysis.Ideographic,
Position: 1,
Start: 0,
End: 3,
},
&analysis.Token{
Term: []byte("ん"),
Type: analysis.Ideographic,
Position: 2,
Start: 3,
End: 6,
},
&analysis.Token{
Term: []byte("に"),
Type: analysis.Ideographic,
Position: 3,
Start: 6,
End: 9,
},
&analysis.Token{
Term: []byte("ち"),
Type: analysis.Ideographic,
Position: 4,
Start: 9,
End: 12,
},
&analysis.Token{
Term: []byte("は"),
Type: analysis.Ideographic,
Position: 5,
Start: 12,
End: 15,
},
&analysis.Token{
Term: []byte("世"),
Type: analysis.Ideographic,
Position: 6,
Start: 15,
End: 18,
},
&analysis.Token{
Term: []byte("界"),
Type: analysis.Ideographic,
Position: 7,
Start: 18,
End: 21,
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("こ"),
Type: analysis.Single,
Position: 1,
Start: 0,
End: 3,
},
&analysis.Token{
Term: []byte("こん"),
Type: analysis.Double,
Position: 1,
Start: 0,
End: 6,
},
&analysis.Token{
Term: []byte("ん"),
Type: analysis.Single,
Position: 2,
Start: 3,
End: 6,
},
&analysis.Token{
Term: []byte("んに"),
Type: analysis.Double,
Position: 2,
Start: 3,
End: 9,
},
&analysis.Token{
Term: []byte("に"),
Type: analysis.Single,
Position: 3,
Start: 6,
End: 9,
},
&analysis.Token{
Term: []byte("にち"),
Type: analysis.Double,
Position: 3,
Start: 6,
End: 12,
},
&analysis.Token{
Term: []byte("ち"),
Type: analysis.Single,
Position: 4,
Start: 9,
End: 12,
},
&analysis.Token{
Term: []byte("ちは"),
Type: analysis.Double,
Position: 4,
Start: 9,
End: 15,
},
&analysis.Token{
Term: []byte("は"),
Type: analysis.Single,
Position: 5,
Start: 12,
End: 15,
},
&analysis.Token{
Term: []byte("は世"),
Type: analysis.Double,
Position: 5,
Start: 12,
End: 18,
},
&analysis.Token{
Term: []byte("世"),
Type: analysis.Single,
Position: 6,
Start: 15,
End: 18,
},
&analysis.Token{
Term: []byte("世界"),
Type: analysis.Double,
Position: 6,
Start: 15,
End: 21,
},
&analysis.Token{
Term: []byte("界"),
Type: analysis.Single,
Position: 7,
Start: 18,
End: 21,
},
},
},
{
// Assuming that `、` is removed by unicode tokenizer from `こんにちは、世界`
outputUnigram: true,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("こ"),
Type: analysis.Ideographic,
Position: 1,
Start: 0,
End: 3,
},
&analysis.Token{
Term: []byte("ん"),
Type: analysis.Ideographic,
Position: 2,
Start: 3,
End: 6,
},
&analysis.Token{
Term: []byte("に"),
Type: analysis.Ideographic,
Position: 3,
Start: 6,
End: 9,
},
&analysis.Token{
Term: []byte("ち"),
Type: analysis.Ideographic,
Position: 4,
Start: 9,
End: 12,
},
&analysis.Token{
Term: []byte("は"),
Type: analysis.Ideographic,
Position: 5,
Start: 12,
End: 15,
},
&analysis.Token{
Term: []byte("世"),
Type: analysis.Ideographic,
Position: 7,
Start: 18,
End: 21,
},
&analysis.Token{
Term: []byte("界"),
Type: analysis.Ideographic,
Position: 8,
Start: 21,
End: 24,
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("こ"),
Type: analysis.Single,
Position: 1,
Start: 0,
End: 3,
},
&analysis.Token{
Term: []byte("こん"),
Type: analysis.Double,
Position: 1,
Start: 0,
End: 6,
},
&analysis.Token{
Term: []byte("ん"),
Type: analysis.Single,
Position: 2,
Start: 3,
End: 6,
},
&analysis.Token{
Term: []byte("んに"),
Type: analysis.Double,
Position: 2,
Start: 3,
End: 9,
},
&analysis.Token{
Term: []byte("に"),
Type: analysis.Single,
Position: 3,
Start: 6,
End: 9,
},
&analysis.Token{
Term: []byte("にち"),
Type: analysis.Double,
Position: 3,
Start: 6,
End: 12,
},
&analysis.Token{
Term: []byte("ち"),
Type: analysis.Single,
Position: 4,
Start: 9,
End: 12,
},
&analysis.Token{
Term: []byte("ちは"),
Type: analysis.Double,
Position: 4,
Start: 9,
End: 15,
},
&analysis.Token{
Term: []byte("は"),
Type: analysis.Single,
Position: 5,
Start: 12,
End: 15,
},
&analysis.Token{
Term: []byte("世"),
Type: analysis.Single,
Position: 6,
Start: 18,
End: 21,
},
&analysis.Token{
Term: []byte("世界"),
Type: analysis.Double,
Position: 6,
Start: 18,
End: 24,
},
&analysis.Token{
Term: []byte("界"),
Type: analysis.Single,
Position: 7,
Start: 21,
End: 24,
},
},
},
{
outputUnigram: false,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("こ"),
Type: analysis.Ideographic,
Position: 1,
Start: 0,
End: 3,
},
&analysis.Token{
Term: []byte("ん"),
Type: analysis.Ideographic,
Position: 2,
Start: 3,
End: 6,
},
&analysis.Token{
Term: []byte("に"),
Type: analysis.Ideographic,
Position: 3,
Start: 6,
End: 9,
},
&analysis.Token{
Term: []byte("ち"),
Type: analysis.Ideographic,
Position: 4,
Start: 9,
End: 12,
},
&analysis.Token{
Term: []byte("は"),
Type: analysis.Ideographic,
Position: 5,
Start: 12,
End: 15,
},
&analysis.Token{
Term: []byte("cat"),
Type: analysis.AlphaNumeric,
Position: 6,
Start: 12,
End: 15,
},
&analysis.Token{
Term: []byte("世"),
Type: analysis.Ideographic,
Position: 7,
Start: 18,
End: 21,
},
&analysis.Token{
Term: []byte("界"),
Type: analysis.Ideographic,
Position: 8,
Start: 21,
End: 24,
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("こん"),
Type: analysis.Double,
Position: 1,
Start: 0,
End: 6,
},
&analysis.Token{
Term: []byte("んに"),
Type: analysis.Double,
Position: 2,
Start: 3,
End: 9,
},
&analysis.Token{
Term: []byte("にち"),
Type: analysis.Double,
Position: 3,
Start: 6,
End: 12,
},
&analysis.Token{
Term: []byte("ちは"),
Type: analysis.Double,
Position: 4,
Start: 9,
End: 15,
},
&analysis.Token{
Term: []byte("cat"),
Type: analysis.AlphaNumeric,
Position: 5,
Start: 12,
End: 15,
},
&analysis.Token{
Term: []byte("世界"),
Type: analysis.Double,
Position: 6,
Start: 18,
End: 24,
},
},
},
{
outputUnigram: false,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("パイプライン"),
Type: analysis.Ideographic,
Position: 1,
Start: 0,
End: 18,
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("パイ"),
Type: analysis.Double,
Position: 1,
Start: 0,
End: 6,
},
&analysis.Token{
Term: []byte("イプ"),
Type: analysis.Double,
Position: 2,
Start: 3,
End: 9,
},
&analysis.Token{
Term: []byte("プラ"),
Type: analysis.Double,
Position: 3,
Start: 6,
End: 12,
},
&analysis.Token{
Term: []byte("ライ"),
Type: analysis.Double,
Position: 4,
Start: 9,
End: 15,
},
&analysis.Token{
Term: []byte("イン"),
Type: analysis.Double,
Position: 5,
Start: 12,
End: 18,
},
},
},
}
for _, test := range tests {
cjkBigramFilter := NewCJKBigramFilter(test.outputUnigram)
actual := cjkBigramFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output, actual)
}
}
}
================================================
FILE: analysis/lang/cjk/cjk_width.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cjk
import (
"bytes"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const WidthName = "cjk_width"
type CJKWidthFilter struct{}
func NewCJKWidthFilter() *CJKWidthFilter {
return &CJKWidthFilter{}
}
func (s *CJKWidthFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
runeCount := utf8.RuneCount(token.Term)
runes := bytes.Runes(token.Term)
for i := 0; i < runeCount; i++ {
ch := runes[i]
if ch >= 0xFF01 && ch <= 0xFF5E {
// fullwidth ASCII variants
runes[i] -= 0xFEE0
} else if ch >= 0xFF65 && ch <= 0xFF9F {
// halfwidth Katakana variants
if (ch == 0xFF9E || ch == 0xFF9F) && i > 0 && combine(runes, i, ch) {
runes = analysis.DeleteRune(runes, i)
i--
runeCount = len(runes)
} else {
runes[i] = kanaNorm[ch-0xFF65]
}
}
}
token.Term = analysis.BuildTermFromRunes(runes)
}
return input
}
var kanaNorm = []rune{
0x30fb, 0x30f2, 0x30a1, 0x30a3, 0x30a5, 0x30a7, 0x30a9, 0x30e3, 0x30e5,
0x30e7, 0x30c3, 0x30fc, 0x30a2, 0x30a4, 0x30a6, 0x30a8, 0x30aa, 0x30ab,
0x30ad, 0x30af, 0x30b1, 0x30b3, 0x30b5, 0x30b7, 0x30b9, 0x30bb, 0x30bd,
0x30bf, 0x30c1, 0x30c4, 0x30c6, 0x30c8, 0x30ca, 0x30cb, 0x30cc, 0x30cd,
0x30ce, 0x30cf, 0x30d2, 0x30d5, 0x30d8, 0x30db, 0x30de, 0x30df, 0x30e0,
0x30e1, 0x30e2, 0x30e4, 0x30e6, 0x30e8, 0x30e9, 0x30ea, 0x30eb, 0x30ec,
0x30ed, 0x30ef, 0x30f3, 0x3099, 0x309A,
}
var kanaCombineVoiced = []rune{
78, 0, 0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 1,
0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1,
0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 8, 8, 8, 8, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
}
var kanaCombineHalfVoiced = []rune{
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 0, 0, 2, 0, 0, 2,
0, 0, 2, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
}
func combine(text []rune, pos int, r rune) bool {
prev := text[pos-1]
if prev >= 0x30A6 && prev <= 0x30FD {
if r == 0xFF9F {
text[pos-1] += kanaCombineHalfVoiced[prev-0x30A6]
} else {
text[pos-1] += kanaCombineVoiced[prev-0x30A6]
}
return text[pos-1] != prev
}
return false
}
func CJKWidthFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewCJKWidthFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(WidthName, CJKWidthFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/cjk/cjk_width_test.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cjk
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestCJKWidthFilter(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Test"),
},
&analysis.Token{
Term: []byte("1234"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Test"),
},
&analysis.Token{
Term: []byte("1234"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("カタカナ"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("カタカナ"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ヴィッツ"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ヴィッツ"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("パナソニック"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("パナソニック"),
},
},
},
}
for _, test := range tests {
cjkWidthFilter := NewCJKWidthFilter()
actual := cjkWidthFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output, actual)
}
}
}
================================================
FILE: analysis/lang/ckb/analyzer_ckb.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ckb
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/registry"
)
const AnalyzerName = "ckb"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
normCkbFilter, err := cache.TokenFilterNamed(NormalizeName)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopCkbFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerCkbFilter, err := cache.TokenFilterNamed(StemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: unicodeTokenizer,
TokenFilters: []analysis.TokenFilter{
normCkbFilter,
toLowerFilter,
stopCkbFilter,
stemmerCkbFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ckb/analyzer_ckb_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ckb
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestSoraniAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stop word removal
{
input: []byte("ئەم پیاوە"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("پیاو"),
Position: 2,
Start: 7,
End: 17,
},
},
},
{
input: []byte("پیاوە"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("پیاو"),
Position: 1,
Start: 0,
End: 10,
},
},
},
{
input: []byte("پیاو"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("پیاو"),
Position: 1,
Start: 0,
End: 8,
},
},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
}
}
}
================================================
FILE: analysis/lang/ckb/sorani_normalize.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ckb
import (
"bytes"
"unicode"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const NormalizeName = "normalize_ckb"
const (
Yeh = '\u064A'
DotlessYeh = '\u0649'
FarsiYeh = '\u06CC'
Kaf = '\u0643'
Keheh = '\u06A9'
Heh = '\u0647'
Ae = '\u06D5'
Zwnj = '\u200C'
HehDoachashmee = '\u06BE'
TehMarbuta = '\u0629'
Reh = '\u0631'
Rreh = '\u0695'
RrehAbove = '\u0692'
Tatweel = '\u0640'
Fathatan = '\u064B'
Dammatan = '\u064C'
Kasratan = '\u064D'
Fatha = '\u064E'
Damma = '\u064F'
Kasra = '\u0650'
Shadda = '\u0651'
Sukun = '\u0652'
)
type SoraniNormalizeFilter struct {
}
func NewSoraniNormalizeFilter() *SoraniNormalizeFilter {
return &SoraniNormalizeFilter{}
}
func (s *SoraniNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
term := normalize(token.Term)
token.Term = term
}
return input
}
func normalize(input []byte) []byte {
runes := bytes.Runes(input)
for i := 0; i < len(runes); i++ {
switch runes[i] {
case Yeh, DotlessYeh:
runes[i] = FarsiYeh
case Kaf:
runes[i] = Keheh
case Zwnj:
if i > 0 && runes[i-1] == Heh {
runes[i-1] = Ae
}
runes = analysis.DeleteRune(runes, i)
i--
case Heh:
if i == len(runes)-1 {
runes[i] = Ae
}
case TehMarbuta:
runes[i] = Ae
case HehDoachashmee:
runes[i] = Heh
case Reh:
if i == 0 {
runes[i] = Rreh
}
case RrehAbove:
runes[i] = Rreh
case Tatweel, Kasratan, Dammatan, Fathatan, Fatha, Damma, Kasra, Shadda, Sukun:
runes = analysis.DeleteRune(runes, i)
i--
default:
if unicode.In(runes[i], unicode.Cf) {
runes = analysis.DeleteRune(runes, i)
i--
}
}
}
return analysis.BuildTermFromRunes(runes)
}
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewSoraniNormalizeFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ckb/sorani_normalize_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ckb
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestSoraniNormalizeFilter(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
// test Y
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u064A"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u06CC"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0649"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u06CC"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u06CC"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u06CC"),
},
},
},
// test K
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0643"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u06A9"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u06A9"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u06A9"),
},
},
},
// test H
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0647\u200C"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u06D5"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0647\u200C\u06A9"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u06D5\u06A9"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u06BE"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0647"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0629"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u06D5"),
},
},
},
// test final H
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0647\u0647\u0647"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0647\u0647\u06D5"),
},
},
},
// test RR
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0692"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0695"),
},
},
},
// test initial RR
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0631\u0631\u0631"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0695\u0631\u0631"),
},
},
},
// test remove
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0640"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u064B"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u064C"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u064D"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u064E"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u064F"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0650"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0651"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0652"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u200C"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
// empty
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
}
soraniNormalizeFilter := NewSoraniNormalizeFilter()
for _, test := range tests {
actual := soraniNormalizeFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %#v, got %#v", test.output, actual)
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/ckb/sorani_stemmer_filter.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ckb
import (
"bytes"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StemmerName = "stemmer_ckb"
type SoraniStemmerFilter struct {
}
func NewSoraniStemmerFilter() *SoraniStemmerFilter {
return &SoraniStemmerFilter{}
}
func (s *SoraniStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
// if not protected keyword, stem it
if !token.KeyWord {
stemmed := stem(token.Term)
token.Term = stemmed
}
}
return input
}
func stem(input []byte) []byte {
inputLen := utf8.RuneCount(input)
// postposition
if inputLen > 5 && bytes.HasSuffix(input, []byte("دا")) {
input = truncateRunes(input, 2)
inputLen = utf8.RuneCount(input)
} else if inputLen > 4 && bytes.HasSuffix(input, []byte("نا")) {
input = truncateRunes(input, 1)
inputLen = utf8.RuneCount(input)
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("ەوە")) {
input = truncateRunes(input, 3)
inputLen = utf8.RuneCount(input)
}
// possessive pronoun
if inputLen > 6 &&
(bytes.HasSuffix(input, []byte("مان")) ||
bytes.HasSuffix(input, []byte("یان")) ||
bytes.HasSuffix(input, []byte("تان"))) {
input = truncateRunes(input, 3)
inputLen = utf8.RuneCount(input)
}
// indefinite singular ezafe
if inputLen > 6 && bytes.HasSuffix(input, []byte("ێکی")) {
return truncateRunes(input, 3)
} else if inputLen > 7 && bytes.HasSuffix(input, []byte("یەکی")) {
return truncateRunes(input, 4)
}
if inputLen > 5 && bytes.HasSuffix(input, []byte("ێک")) {
// indefinite singular
return truncateRunes(input, 2)
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("یەک")) {
// indefinite singular
return truncateRunes(input, 3)
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("ەکە")) {
// definite singular
return truncateRunes(input, 3)
} else if inputLen > 5 && bytes.HasSuffix(input, []byte("کە")) {
// definite singular
return truncateRunes(input, 2)
} else if inputLen > 7 && bytes.HasSuffix(input, []byte("ەکان")) {
// definite plural
return truncateRunes(input, 4)
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("کان")) {
// definite plural
return truncateRunes(input, 3)
} else if inputLen > 7 && bytes.HasSuffix(input, []byte("یانی")) {
// indefinite plural ezafe
return truncateRunes(input, 4)
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("انی")) {
// indefinite plural ezafe
return truncateRunes(input, 3)
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("یان")) {
// indefinite plural
return truncateRunes(input, 3)
} else if inputLen > 5 && bytes.HasSuffix(input, []byte("ان")) {
// indefinite plural
return truncateRunes(input, 2)
} else if inputLen > 7 && bytes.HasSuffix(input, []byte("یانە")) {
// demonstrative plural
return truncateRunes(input, 4)
} else if inputLen > 6 && bytes.HasSuffix(input, []byte("انە")) {
// demonstrative plural
return truncateRunes(input, 3)
} else if inputLen > 5 && (bytes.HasSuffix(input, []byte("ایە")) || bytes.HasSuffix(input, []byte("ەیە"))) {
// demonstrative singular
return truncateRunes(input, 2)
} else if inputLen > 4 && bytes.HasSuffix(input, []byte("ە")) {
// demonstrative singular
return truncateRunes(input, 1)
} else if inputLen > 4 && bytes.HasSuffix(input, []byte("ی")) {
// absolute singular ezafe
return truncateRunes(input, 1)
}
return input
}
func truncateRunes(input []byte, num int) []byte {
runes := bytes.Runes(input)
runes = runes[:len(runes)-num]
out := buildTermFromRunes(runes)
return out
}
func buildTermFromRunes(runes []rune) []byte {
rv := make([]byte, 0, len(runes)*4)
for _, r := range runes {
runeBytes := make([]byte, utf8.RuneLen(r))
utf8.EncodeRune(runeBytes, r)
rv = append(rv, runeBytes...)
}
return rv
}
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewSoraniStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ckb/sorani_stemmer_filter_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ckb
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
)
func TestSoraniStemmerFilter(t *testing.T) {
// in order to match the lucene tests
// we will test with an analyzer, not just the stemmer
analyzer := analysis.DefaultAnalyzer{
Tokenizer: single.NewSingleTokenTokenizer(),
TokenFilters: []analysis.TokenFilter{
NewSoraniNormalizeFilter(),
NewSoraniStemmerFilter(),
},
}
tests := []struct {
input []byte
output analysis.TokenStream
}{
{ // -ek
input: []byte("پیاوێک"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("پیاو"),
Position: 1,
Start: 0,
End: 12,
},
},
},
{ // -yek
input: []byte("دەرگایەک"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("دەرگا"),
Position: 1,
Start: 0,
End: 16,
},
},
},
{ // -aka
input: []byte("پیاوەكە"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("پیاو"),
Position: 1,
Start: 0,
End: 14,
},
},
},
{ // -ka
input: []byte("دەرگاكە"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("دەرگا"),
Position: 1,
Start: 0,
End: 14,
},
},
},
{ // -a
input: []byte("کتاویە"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("کتاوی"),
Position: 1,
Start: 0,
End: 12,
},
},
},
{ // -ya
input: []byte("دەرگایە"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("دەرگا"),
Position: 1,
Start: 0,
End: 14,
},
},
},
{ // -An
input: []byte("پیاوان"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("پیاو"),
Position: 1,
Start: 0,
End: 12,
},
},
},
{ // -yAn
input: []byte("دەرگایان"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("دەرگا"),
Position: 1,
Start: 0,
End: 16,
},
},
},
{ // -akAn
input: []byte("پیاوەکان"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("پیاو"),
Position: 1,
Start: 0,
End: 16,
},
},
},
{ // -kAn
input: []byte("دەرگاکان"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("دەرگا"),
Position: 1,
Start: 0,
End: 16,
},
},
},
{ // -Ana
input: []byte("پیاوانە"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("پیاو"),
Position: 1,
Start: 0,
End: 14,
},
},
},
{ // -yAna
input: []byte("دەرگایانە"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("دەرگا"),
Position: 1,
Start: 0,
End: 18,
},
},
},
{ // Ezafe singular
input: []byte("هۆتیلی"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("هۆتیل"),
Position: 1,
Start: 0,
End: 12,
},
},
},
{ // Ezafe indefinite
input: []byte("هۆتیلێکی"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("هۆتیل"),
Position: 1,
Start: 0,
End: 16,
},
},
},
{ // Ezafe plural
input: []byte("هۆتیلانی"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("هۆتیل"),
Position: 1,
Start: 0,
End: 16,
},
},
},
{ // -awa
input: []byte("دوورەوە"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("دوور"),
Position: 1,
Start: 0,
End: 14,
},
},
},
{ // -dA
input: []byte("نیوەشەودا"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("نیوەشەو"),
Position: 1,
Start: 0,
End: 18,
},
},
},
{ // -A
input: []byte("سۆرانا"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("سۆران"),
Position: 1,
Start: 0,
End: 12,
},
},
},
{ // -mAn
input: []byte("پارەمان"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("پارە"),
Position: 1,
Start: 0,
End: 14,
},
},
},
{ // -tAn
input: []byte("پارەتان"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("پارە"),
Position: 1,
Start: 0,
End: 14,
},
},
},
{ // -yAn
input: []byte("پارەیان"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("پارە"),
Position: 1,
Start: 0,
End: 14,
},
},
},
{ // empty
input: []byte(""),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
Position: 1,
Start: 0,
End: 0,
},
},
},
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("for input %s(% x)", test.input, test.input)
t.Errorf("\texpected:")
for _, token := range test.output {
t.Errorf("\t\t%v %s(% x)", token, token.Term, token.Term)
}
t.Errorf("\tactual:")
for _, token := range actual {
t.Errorf("\t\t%v %s(% x)", token, token.Term, token.Term)
}
}
}
}
================================================
FILE: analysis/lang/ckb/stop_filter_ckb.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ckb
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ckb/stop_words_ckb.go
================================================
package ckb
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_ckb"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
// ` was changed to ' to allow for literal string
var SoraniStopWords = []byte(`# set of kurdish stopwords
# note these have been normalized with our scheme (e represented with U+06D5, etc)
# constructed from:
# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al)
# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston)
# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc
# and
و
# which
کە
# of
ی
# made/did
کرد
# that/which
ئەوەی
# on/head
سەر
# two
دوو
# also
هەروەها
# from/that
لەو
# makes/does
دەکات
# some
چەند
# every
هەر
# demonstratives
# that
ئەو
# this
ئەم
# personal pronouns
# I
من
# we
ئێمە
# you
تۆ
# you
ئێوە
# he/she/it
ئەو
# they
ئەوان
# prepositions
# to/with/by
بە
پێ
# without
بەبێ
# along with/while/during
بەدەم
# in the opinion of
بەلای
# according to
بەپێی
# before
بەرلە
# in the direction of
بەرەوی
# in front of/toward
بەرەوە
# before/in the face of
بەردەم
# without
بێ
# except for
بێجگە
# for
بۆ
# on/in
دە
تێ
# with
دەگەڵ
# after
دوای
# except for/aside from
جگە
# in/from
لە
لێ
# in front of/before/because of
لەبەر
# between/among
لەبەینی
# concerning/about
لەبابەت
# concerning
لەبارەی
# instead of
لەباتی
# beside
لەبن
# instead of
لەبرێتی
# behind
لەدەم
# with/together with
لەگەڵ
# by
لەلایەن
# within
لەناو
# between/among
لەنێو
# for the sake of
لەپێناوی
# with respect to
لەرەوی
# by means of/for
لەرێ
# for the sake of
لەرێگا
# on/on top of/according to
لەسەر
# under
لەژێر
# between/among
ناو
# between/among
نێوان
# after
پاش
# before
پێش
# like
وەک
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(SoraniStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/cs/stop_filter_cs.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cs
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/cs/stop_words_cs.go
================================================
package cs
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_cs"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
// ` was changed to ' to allow for literal string
var CzechStopWords = []byte(`a
s
k
o
i
u
v
z
dnes
cz
tímto
budeš
budem
byli
jseš
můj
svým
ta
tomto
tohle
tuto
tyto
jej
zda
proč
máte
tato
kam
tohoto
kdo
kteří
mi
nám
tom
tomuto
mít
nic
proto
kterou
byla
toho
protože
asi
ho
naši
napište
re
což
tím
takže
svých
její
svými
jste
aj
tu
tedy
teto
bylo
kde
ke
pravé
ji
nad
nejsou
či
pod
téma
mezi
přes
ty
pak
vám
ani
když
však
neg
jsem
tento
článku
články
aby
jsme
před
pta
jejich
byl
ještě
až
bez
také
pouze
první
vaše
která
nás
nový
tipy
pokud
může
strana
jeho
své
jiné
zprávy
nové
není
vás
jen
podle
zde
už
být
více
bude
již
než
který
by
které
co
nebo
ten
tak
má
při
od
po
jsou
jak
další
ale
si
se
ve
to
jako
za
zpět
ze
do
pro
je
na
atd
atp
jakmile
přičemž
já
on
ona
ono
oni
ony
my
vy
jí
ji
mě
mne
jemu
tomu
těm
těmu
němu
němuž
jehož
jíž
jelikož
jež
jakož
načež
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(CzechStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/da/analyzer_da.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package da
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/registry"
)
const AnalyzerName = "da"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopDaFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerDaFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: unicodeTokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopDaFilter,
stemmerDaFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/da/analyzer_da_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package da
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestDanishAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stemming
{
input: []byte("undersøg"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("undersøg"),
Position: 1,
Start: 0,
End: 9,
},
},
},
{
input: []byte("undersøgelse"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("undersøg"),
Position: 1,
Start: 0,
End: 13,
},
},
},
// stop word
{
input: []byte("på"),
output: analysis.TokenStream{},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
}
}
}
================================================
FILE: analysis/lang/da/stemmer_da.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package da
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/snowballstem"
"github.com/blevesearch/snowballstem/danish"
)
const SnowballStemmerName = "stemmer_da_snowball"
type DanishStemmerFilter struct {
}
func NewDanishStemmerFilter() *DanishStemmerFilter {
return &DanishStemmerFilter{}
}
func (s *DanishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
env := snowballstem.NewEnv(string(token.Term))
danish.Stem(env)
token.Term = []byte(env.Current())
}
return input
}
func DanishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewDanishStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(SnowballStemmerName, DanishStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/da/stop_filter_da.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package da
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/da/stop_words_da.go
================================================
package da
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_da"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var DanishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Danish stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
| This is a ranked list (commonest to rarest) of stopwords derived from
| a large text sample.
og | and
i | in
jeg | I
det | that (dem. pronoun)/it (pers. pronoun)
at | that (in front of a sentence)/to (with infinitive)
en | a/an
den | it (pers. pronoun)/that (dem. pronoun)
til | to/at/for/until/against/by/of/into, more
er | present tense of "to be"
som | who, as
på | on/upon/in/on/at/to/after/of/with/for, on
de | they
med | with/by/in, along
han | he
af | of/by/from/off/for/in/with/on, off
for | at/for/to/from/by/of/ago, in front/before, because
ikke | not
der | who/which, there/those
var | past tense of "to be"
mig | me/myself
sig | oneself/himself/herself/itself/themselves
men | but
et | a/an/one, one (number), someone/somebody/one
har | present tense of "to have"
om | round/about/for/in/a, about/around/down, if
vi | we
min | my
havde | past tense of "to have"
ham | him
hun | she
nu | now
over | over/above/across/by/beyond/past/on/about, over/past
da | then, when/as/since
fra | from/off/since, off, since
du | you
ud | out
sin | his/her/its/one's
dem | them
os | us/ourselves
op | up
man | you/one
hans | his
hvor | where
eller | or
hvad | what
skal | must/shall etc.
selv | myself/youself/herself/ourselves etc., even
her | here
alle | all/everyone/everybody etc.
vil | will (verb)
blev | past tense of "to stay/to remain/to get/to become"
kunne | could
ind | in
når | when
være | present tense of "to be"
dog | however/yet/after all
noget | something
ville | would
jo | you know/you see (adv), yes
deres | their/theirs
efter | after/behind/according to/for/by/from, later/afterwards
ned | down
skulle | should
denne | this
end | than
dette | this
mit | my/mine
også | also
under | under/beneath/below/during, below/underneath
have | have
dig | you
anden | other
hende | her
mine | my
alt | everything
meget | much/very, plenty of
sit | his, her, its, one's
sine | his, her, its, one's
vor | our
mod | against
disse | these
hvis | if
din | your/yours
nogle | some
hos | by/at
blive | be/become
mange | many
ad | by/through
bliver | present tense of "to be/to become"
hendes | her/hers
været | be
thi | for (conj)
jer | you
sådan | such, like this/like that
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(DanishStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/de/analyzer_de.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package de
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/registry"
)
const AnalyzerName = "de"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopDeFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
normalizeDeFilter, err := cache.TokenFilterNamed(NormalizeName)
if err != nil {
return nil, err
}
lightStemmerDeFilter, err := cache.TokenFilterNamed(LightStemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: unicodeTokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopDeFilter,
normalizeDeFilter,
lightStemmerDeFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/de/analyzer_de_test.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package de
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestGermanAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
{
input: []byte("Tisch"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("tisch"),
Position: 1,
Start: 0,
End: 5,
},
},
},
{
input: []byte("Tische"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("tisch"),
Position: 1,
Start: 0,
End: 6,
},
},
},
{
input: []byte("Tischen"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("tisch"),
Position: 1,
Start: 0,
End: 7,
},
},
},
// german specials
{
input: []byte("Schaltflächen"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("schaltflach"),
Position: 1,
Start: 0,
End: 14,
},
},
},
{
input: []byte("Schaltflaechen"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("schaltflach"),
Position: 1,
Start: 0,
End: 14,
},
},
},
// tests added by marty to increase coverage
{
input: []byte("Blechern"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("blech"),
Position: 1,
Start: 0,
End: 8,
},
},
},
{
input: []byte("Klecks"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("kleck"),
Position: 1,
Start: 0,
End: 6,
},
},
},
{
input: []byte("Mindestens"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("mindest"),
Position: 1,
Start: 0,
End: 10,
},
},
},
{
input: []byte("Kugelfest"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("kugelf"),
Position: 1,
Start: 0,
End: 9,
},
},
},
{
input: []byte("Baldigst"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("baldig"),
Position: 1,
Start: 0,
End: 8,
},
},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
}
}
}
================================================
FILE: analysis/lang/de/german_normalize.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package de
import (
"bytes"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const NormalizeName = "normalize_de"
const (
N = 0 /* ordinary state */
V = 1 /* stops 'u' from entering umlaut state */
U = 2 /* umlaut state, allows e-deletion */
)
type GermanNormalizeFilter struct {
}
func NewGermanNormalizeFilter() *GermanNormalizeFilter {
return &GermanNormalizeFilter{}
}
func (s *GermanNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
term := normalize(token.Term)
token.Term = term
}
return input
}
func normalize(input []byte) []byte {
state := N
runes := bytes.Runes(input)
for i := 0; i < len(runes); i++ {
switch runes[i] {
case 'a', 'o':
state = U
case 'u':
if state == N {
state = U
} else {
state = V
}
case 'e':
if state == U {
runes = analysis.DeleteRune(runes, i)
i--
}
state = V
case 'i', 'q', 'y':
state = V
case 'ä':
runes[i] = 'a'
state = V
case 'ö':
runes[i] = 'o'
state = V
case 'ü':
runes[i] = 'u'
state = V
case 'ß':
runes[i] = 's'
i++
runes = analysis.InsertRune(runes, i, 's')
state = N
default:
state = N
}
}
return analysis.BuildTermFromRunes(runes)
}
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewGermanNormalizeFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/de/german_normalize_test.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package de
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestGermanNormalizeFilter(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
// Tests that a/o/u + e is equivalent to the umlaut form
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Schaltflächen"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Schaltflachen"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Schaltflaechen"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Schaltflachen"),
},
},
},
// Tests the specific heuristic that ue is not folded after a vowel or q.
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("dauer"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("dauer"),
},
},
},
// Tests german specific folding of sharp-s
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("weißbier"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("weissbier"),
},
},
},
// empty
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
}
germanNormalizeFilter := NewGermanNormalizeFilter()
for _, test := range tests {
actual := germanNormalizeFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %#v, got %#v", test.output, actual)
t.Errorf("expected %s(% x), got %s(% x)", test.output[0].Term, test.output[0].Term, actual[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/de/light_stemmer_de.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package de
import (
"bytes"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const LightStemmerName = "stemmer_de_light"
type GermanLightStemmerFilter struct {
}
func NewGermanLightStemmerFilter() *GermanLightStemmerFilter {
return &GermanLightStemmerFilter{}
}
func (s *GermanLightStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
runes := bytes.Runes(token.Term)
runes = stem(runes)
token.Term = analysis.BuildTermFromRunes(runes)
}
return input
}
func stem(input []rune) []rune {
for i, r := range input {
switch r {
case 'ä', 'à', 'á', 'â':
input[i] = 'a'
case 'ö', 'ò', 'ó', 'ô':
input[i] = 'o'
case 'ï', 'ì', 'í', 'î':
input[i] = 'i'
case 'ü', 'ù', 'ú', 'û':
input[i] = 'u'
}
}
input = step1(input)
return step2(input)
}
func stEnding(ch rune) bool {
switch ch {
case 'b', 'd', 'f', 'g', 'h', 'k', 'l', 'm', 'n', 't':
return true
}
return false
}
func step1(s []rune) []rune {
l := len(s)
if l > 5 && s[l-3] == 'e' && s[l-2] == 'r' && s[l-1] == 'n' {
return s[:l-3]
}
if l > 4 && s[l-2] == 'e' {
switch s[l-1] {
case 'm', 'n', 'r', 's':
return s[:l-2]
}
}
if l > 3 && s[l-1] == 'e' {
return s[:l-1]
}
if l > 3 && s[l-1] == 's' && stEnding(s[l-2]) {
return s[:l-1]
}
return s
}
func step2(s []rune) []rune {
l := len(s)
if l > 5 && s[l-3] == 'e' && s[l-2] == 's' && s[l-1] == 't' {
return s[:l-3]
}
if l > 4 && s[l-2] == 'e' && (s[l-1] == 'r' || s[l-1] == 'n') {
return s[:l-2]
}
if l > 4 && s[l-2] == 's' && s[l-1] == 't' && stEnding(s[l-3]) {
return s[:l-2]
}
return s
}
func GermanLightStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewGermanLightStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(LightStemmerName, GermanLightStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/de/stemmer_de_snowball.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package de
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/snowballstem"
"github.com/blevesearch/snowballstem/german"
)
const SnowballStemmerName = "stemmer_de_snowball"
type GermanStemmerFilter struct {
}
func NewGermanStemmerFilter() *GermanStemmerFilter {
return &GermanStemmerFilter{}
}
func (s *GermanStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
env := snowballstem.NewEnv(string(token.Term))
german.Stem(env)
token.Term = []byte(env.Current())
}
return input
}
func GermanStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewGermanStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(SnowballStemmerName, GermanStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/de/stemmer_de_test.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package de
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestSnowballGermanStemmer(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("abzuschrecken"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("abzuschreck"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("abzuwarten"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("abzuwart"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("zwirnfabrik"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("zwirnfabr"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("zyniker"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("zynik"),
},
},
},
}
cache := registry.NewCache()
filter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := filter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/de/stop_filter_de.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package de
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/de/stop_words_de.go
================================================
package de
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_de"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var GermanStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A German stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
| The number of forms in this list is reduced significantly by passing it
| through the German stemmer.
aber | but
alle | all
allem
allen
aller
alles
als | than, as
also | so
am | an + dem
an | at
ander | other
andere
anderem
anderen
anderer
anderes
anderm
andern
anderr
anders
auch | also
auf | on
aus | out of
bei | by
bin | am
bis | until
bist | art
da | there
damit | with it
dann | then
der | the
den
des
dem
die
das
daß | that
derselbe | the same
derselben
denselben
desselben
demselben
dieselbe
dieselben
dasselbe
dazu | to that
dein | thy
deine
deinem
deinen
deiner
deines
denn | because
derer | of those
dessen | of him
dich | thee
dir | to thee
du | thou
dies | this
diese
diesem
diesen
dieser
dieses
doch | (several meanings)
dort | (over) there
durch | through
ein | a
eine
einem
einen
einer
eines
einig | some
einige
einigem
einigen
einiger
einiges
einmal | once
er | he
ihn | him
ihm | to him
es | it
etwas | something
euer | your
eure
eurem
euren
eurer
eures
für | for
gegen | towards
gewesen | p.p. of sein
hab | have
habe | have
haben | have
hat | has
hatte | had
hatten | had
hier | here
hin | there
hinter | behind
ich | I
mich | me
mir | to me
ihr | you, to her
ihre
ihrem
ihren
ihrer
ihres
euch | to you
im | in + dem
in | in
indem | while
ins | in + das
ist | is
jede | each, every
jedem
jeden
jeder
jedes
jene | that
jenem
jenen
jener
jenes
jetzt | now
kann | can
kein | no
keine
keinem
keinen
keiner
keines
können | can
könnte | could
machen | do
man | one
manche | some, many a
manchem
manchen
mancher
manches
mein | my
meine
meinem
meinen
meiner
meines
mit | with
muss | must
musste | had to
nach | to(wards)
nicht | not
nichts | nothing
noch | still, yet
nun | now
nur | only
ob | whether
oder | or
ohne | without
sehr | very
sein | his
seine
seinem
seinen
seiner
seines
selbst | self
sich | herself
sie | they, she
ihnen | to them
sind | are
so | so
solche | such
solchem
solchen
solcher
solches
soll | shall
sollte | should
sondern | but
sonst | else
über | over
um | about, around
und | and
uns | us
unse
unsem
unsen
unser
unses
unter | under
viel | much
vom | von + dem
von | from
vor | before
während | while
war | was
waren | were
warst | wast
was | what
weg | away, off
weil | because
weiter | further
welche | which
welchem
welchen
welcher
welches
wenn | when
werde | will
werden | will
wie | how
wieder | again
will | want
wir | we
wird | will
wirst | willst
wo | where
wollen | want
wollte | wanted
würde | would
würden | would
zu | to
zum | zu + dem
zur | zu + der
zwar | indeed
zwischen | between
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(GermanStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/el/stop_filter_el.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package el
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/el/stop_words_el.go
================================================
package el
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_el"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
// ` was changed to ' to allow for literal string
var GreekStopWords = []byte(`# Lucene Greek Stopwords list
# Note: by default this file is used after GreekLowerCaseFilter,
# so when modifying this file use 'σ' instead of 'ς'
ο
η
το
οι
τα
του
τησ
των
τον
την
και
κι
κ
ειμαι
εισαι
ειναι
ειμαστε
ειστε
στο
στον
στη
στην
μα
αλλα
απο
για
προσ
με
σε
ωσ
παρα
αντι
κατα
μετα
θα
να
δε
δεν
μη
μην
επι
ενω
εαν
αν
τοτε
που
πωσ
ποιοσ
ποια
ποιο
ποιοι
ποιεσ
ποιων
ποιουσ
αυτοσ
αυτη
αυτο
αυτοι
αυτων
αυτουσ
αυτεσ
αυτα
εκεινοσ
εκεινη
εκεινο
εκεινοι
εκεινεσ
εκεινα
εκεινων
εκεινουσ
οπωσ
ομωσ
ισωσ
οσο
οτι
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(GreekStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/en/analyzer_en.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package en implements an analyzer with reasonable defaults for processing
// English text.
//
// It strips possessive suffixes ('s), transforms tokens to lower case,
// removes stopwords from a built-in list, and applies porter stemming.
//
// The built-in stopwords list is defined in EnglishStopWords.
package en
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/porter"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "en"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
possEnFilter, err := cache.TokenFilterNamed(PossessiveName)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopEnFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerEnFilter, err := cache.TokenFilterNamed(porter.Name)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
possEnFilter,
toLowerFilter,
stopEnFilter,
stemmerEnFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/en/analyzer_en_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package en
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestEnglishAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stemming
{
input: []byte("books"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("book"),
Position: 1,
Start: 0,
End: 5,
},
},
},
{
input: []byte("book"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("book"),
Position: 1,
Start: 0,
End: 4,
},
},
},
// stop word removal
{
input: []byte("the"),
output: analysis.TokenStream{},
},
// possessive removal
{
input: []byte("steven's"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("steven"),
Position: 1,
Start: 0,
End: 8,
},
},
},
{
input: []byte("steven\u2019s"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("steven"),
Position: 1,
Start: 0,
End: 10,
},
},
},
{
input: []byte("steven\uFF07s"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("steven"),
Position: 1,
Start: 0,
End: 10,
},
},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
}
}
}
================================================
FILE: analysis/lang/en/plural_stemmer.go
================================================
/*
This code was ported from the Open Search Project
https://github.com/opensearch-project/OpenSearch/blob/main/modules/analysis-common/src/main/java/org/opensearch/analysis/common/EnglishPluralStemFilter.java
The algorithm itself was created by Mark Harwood
https://github.com/markharwood
*/
/*
* SPDX-License-Identifier: Apache-2.0
*
* The OpenSearch Contributors require contributions made to
* this file be licensed under the Apache-2.0 license or a
* compatible open source license.
*/
/*
* Licensed to Elasticsearch under one or more contributor
* license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright
* ownership. Elasticsearch licenses this file to you under
* the Apache License, Version 2.0 (the "License"); you may
* not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing,
* software distributed under the License is distributed on an
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
* KIND, either express or implied. See the License for the
* specific language governing permissions and limitations
* under the License.
*/
package en
import (
"strings"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const PluralStemmerName = "stemmer_en_plural"
type EnglishPluralStemmerFilter struct {
}
func NewEnglishPluralStemmerFilter() *EnglishPluralStemmerFilter {
return &EnglishPluralStemmerFilter{}
}
func (s *EnglishPluralStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
token.Term = []byte(stem(string(token.Term)))
}
return input
}
func EnglishPluralStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewEnglishPluralStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(PluralStemmerName, EnglishPluralStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
// ----------------------------------------------------------------------------
// Words ending in oes that retain the e when stemmed
var oesExceptions = []string{"shoes", "canoes", "oboes"}
// Words ending in ches that retain the e when stemmed
var chesExceptions = []string{
"cliches",
"avalanches",
"mustaches",
"moustaches",
"quiches",
"headaches",
"heartaches",
"porsches",
"tranches",
"caches",
}
func stem(word string) string {
runes := []rune(strings.ToLower(word))
if len(runes) < 3 || runes[len(runes)-1] != 's' {
return string(runes)
}
switch runes[len(runes)-2] {
case 'u':
fallthrough
case 's':
return string(runes)
case 'e':
// Modified ies->y logic from original s-stemmer - only work on strings > 4
// so spies -> spy still but pies->pie.
// The original code also special-cased aies and eies for no good reason as far as I can tell.
// ( no words of consequence - eg http://www.thefreedictionary.com/words-that-end-in-aies )
if len(runes) > 4 && runes[len(runes)-3] == 'i' {
runes[len(runes)-3] = 'y'
return string(runes[0 : len(runes)-2])
}
// Suffix rules to remove any dangling "e"
if len(runes) > 3 {
// xes (but >1 prefix so we can stem "boxes->box" but keep "axes->axe")
if len(runes) > 4 && runes[len(runes)-3] == 'x' {
return string(runes[0 : len(runes)-2])
}
// oes
if len(runes) > 3 && runes[len(runes)-3] == 'o' {
if isException(runes, oesExceptions) {
// Only remove the S
return string(runes[0 : len(runes)-1])
}
// Remove the es
return string(runes[0 : len(runes)-2])
}
if len(runes) > 4 {
// shes/sses
if runes[len(runes)-4] == 's' && (runes[len(runes)-3] == 'h' || runes[len(runes)-3] == 's') {
return string(runes[0 : len(runes)-2])
}
// ches
if len(runes) > 4 {
if runes[len(runes)-4] == 'c' && runes[len(runes)-3] == 'h' {
if isException(runes, chesExceptions) {
// Only remove the S
return string(runes[0 : len(runes)-1])
}
// Remove the es
return string(runes[0 : len(runes)-2])
}
}
}
}
fallthrough
default:
return string(runes[0 : len(runes)-1])
}
}
func isException(word []rune, exceptions []string) bool {
for _, exception := range exceptions {
exceptionRunes := []rune(exception)
exceptionPos := len(exceptionRunes) - 1
wordPos := len(word) - 1
matched := true
for exceptionPos >= 0 && wordPos >= 0 {
if exceptionRunes[exceptionPos] != word[wordPos] {
matched = false
break
}
exceptionPos--
wordPos--
}
if matched {
return true
}
}
return false
}
================================================
FILE: analysis/lang/en/plural_stemmer_test.go
================================================
package en
import "testing"
func TestEnglishPluralStemmer(t *testing.T) {
data := []struct {
In, Out string
}{
{"dresses", "dress"},
{"dress", "dress"},
{"axes", "axe"},
{"ad", "ad"},
{"ads", "ad"},
{"gas", "ga"},
{"sass", "sass"},
{"berries", "berry"},
{"dresses", "dress"},
{"spies", "spy"},
{"shoes", "shoe"},
{"headaches", "headache"},
{"computer", "computer"},
{"dressing", "dressing"},
{"clothes", "clothe"},
{"DRESSES", "dress"},
{"frog", "frog"},
{"dress", "dress"},
{"runs", "run"},
{"pies", "pie"},
{"foxes", "fox"},
{"axes", "axe"},
{"foes", "fo"},
{"dishes", "dish"},
{"snitches", "snitch"},
{"cliches", "cliche"},
{"forests", "forest"},
{"yes", "ye"},
}
for _, datum := range data {
stemmed := stem(datum.In)
if stemmed != datum.Out {
t.Errorf("expected %v but got %v", datum.Out, stemmed)
}
}
}
================================================
FILE: analysis/lang/en/possessive_filter_en.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package en
import (
"unicode/utf8"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
// PossessiveName is the name PossessiveFilter is registered as
// in the bleve registry.
const PossessiveName = "possessive_en"
const rightSingleQuotationMark = '’'
const apostrophe = '\''
const fullWidthApostrophe = '''
const apostropheChars = rightSingleQuotationMark + apostrophe + fullWidthApostrophe
// PossessiveFilter implements a TokenFilter which
// strips the English possessive suffix ('s) from tokens.
// It handle a variety of apostrophe types, is case-insensitive
// and doesn't distinguish between possessive and contraction.
// (ie "She's So Rad" becomes "She So Rad")
type PossessiveFilter struct {
}
func NewPossessiveFilter() *PossessiveFilter {
return &PossessiveFilter{}
}
func (s *PossessiveFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
lastRune, lastRuneSize := utf8.DecodeLastRune(token.Term)
if lastRune == 's' || lastRune == 'S' {
nextLastRune, nextLastRuneSize := utf8.DecodeLastRune(token.Term[:len(token.Term)-lastRuneSize])
if nextLastRune == rightSingleQuotationMark ||
nextLastRune == apostrophe ||
nextLastRune == fullWidthApostrophe {
token.Term = token.Term[:len(token.Term)-lastRuneSize-nextLastRuneSize]
}
}
}
return input
}
func PossessiveFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewPossessiveFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(PossessiveName, PossessiveFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/en/possessive_filter_en_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package en
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestEnglishPossessiveFilter(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("marty's"),
},
&analysis.Token{
Term: []byte("MARTY'S"),
},
&analysis.Token{
Term: []byte("marty’s"),
},
&analysis.Token{
Term: []byte("MARTY’S"),
},
&analysis.Token{
Term: []byte("marty's"),
},
&analysis.Token{
Term: []byte("MARTY'S"),
},
&analysis.Token{
Term: []byte("m"),
},
&analysis.Token{
Term: []byte("s"),
},
&analysis.Token{
Term: []byte("'s"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("marty"),
},
&analysis.Token{
Term: []byte("MARTY"),
},
&analysis.Token{
Term: []byte("marty"),
},
&analysis.Token{
Term: []byte("MARTY"),
},
&analysis.Token{
Term: []byte("marty"),
},
&analysis.Token{
Term: []byte("MARTY"),
},
&analysis.Token{
Term: []byte("m"),
},
&analysis.Token{
Term: []byte("s"),
},
&analysis.Token{
Term: []byte(""),
},
},
},
}
cache := registry.NewCache()
stemmerFilter, err := cache.TokenFilterNamed(PossessiveName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := stemmerFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output, actual)
}
}
}
func BenchmarkEnglishPossessiveFilter(b *testing.B) {
input := analysis.TokenStream{
&analysis.Token{
Term: []byte("marty's"),
},
&analysis.Token{
Term: []byte("MARTY'S"),
},
&analysis.Token{
Term: []byte("marty’s"),
},
&analysis.Token{
Term: []byte("MARTY’S"),
},
&analysis.Token{
Term: []byte("marty's"),
},
&analysis.Token{
Term: []byte("MARTY'S"),
},
&analysis.Token{
Term: []byte("m"),
},
}
cache := registry.NewCache()
stemmerFilter, err := cache.TokenFilterNamed(PossessiveName)
if err != nil {
b.Fatal(err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
stemmerFilter.Filter(input)
}
}
================================================
FILE: analysis/lang/en/stemmer_en_snowball.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package en
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/snowballstem"
"github.com/blevesearch/snowballstem/english"
)
const SnowballStemmerName = "stemmer_en_snowball"
type EnglishStemmerFilter struct {
}
func NewEnglishStemmerFilter() *EnglishStemmerFilter {
return &EnglishStemmerFilter{}
}
func (s *EnglishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
env := snowballstem.NewEnv(string(token.Term))
english.Stem(env)
token.Term = []byte(env.Current())
}
return input
}
func EnglishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewEnglishStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(SnowballStemmerName, EnglishStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/en/stemmer_en_test.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package en
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestSnowballEnglishStemmer(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("enjoy"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("enjoy"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("enjoyed"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("enjoy"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("enjoyable"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("enjoy"),
},
},
},
}
cache := registry.NewCache()
filter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := filter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/en/stop_filter_en.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package en
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/en/stop_words_en.go
================================================
package en
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_en"
// EnglishStopWords is the built-in list of stopwords used by the "stop_en" TokenFilter.
//
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var EnglishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/english/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| An English stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
| Many of the forms below are quite rare (e.g. "yourselves") but included for
| completeness.
| PRONOUNS FORMS
| 1st person sing
i | subject, always in upper case of course
me | object
my | possessive adjective
| the possessive pronoun 'mine' is best suppressed, because of the
| sense of coal-mine etc.
myself | reflexive
| 1st person plural
we | subject
| us | object
| care is required here because US = United States. It is usually
| safe to remove it if it is in lower case.
our | possessive adjective
ours | possessive pronoun
ourselves | reflexive
| second person (archaic 'thou' forms not included)
you | subject and object
your | possessive adjective
yours | possessive pronoun
yourself | reflexive (singular)
yourselves | reflexive (plural)
| third person singular
he | subject
him | object
his | possessive adjective and pronoun
himself | reflexive
she | subject
her | object and possessive adjective
hers | possessive pronoun
herself | reflexive
it | subject and object
its | possessive adjective
itself | reflexive
| third person plural
they | subject
them | object
their | possessive adjective
theirs | possessive pronoun
themselves | reflexive
| other forms (demonstratives, interrogatives)
what
which
who
whom
this
that
these
those
| VERB FORMS (using F.R. Palmer's nomenclature)
| BE
am | 1st person, present
is | -s form (3rd person, present)
are | present
was | 1st person, past
were | past
be | infinitive
been | past participle
being | -ing form
| HAVE
have | simple
has | -s form
had | past
having | -ing form
| DO
do | simple
does | -s form
did | past
doing | -ing form
| The forms below are, I believe, best omitted, because of the significant
| homonym forms:
| He made a WILL
| old tin CAN
| merry month of MAY
| a smell of MUST
| fight the good fight with all thy MIGHT
| would, could, should, ought might however be included
| | AUXILIARIES
| | WILL
|will
would
| | SHALL
|shall
should
| | CAN
|can
could
| | MAY
|may
|might
| | MUST
|must
| | OUGHT
ought
| COMPOUND FORMS, increasingly encountered nowadays in 'formal' writing
| pronoun + verb
i'm
you're
he's
she's
it's
we're
they're
i've
you've
we've
they've
i'd
you'd
he'd
she'd
we'd
they'd
i'll
you'll
he'll
she'll
we'll
they'll
| verb + negation
isn't
aren't
wasn't
weren't
hasn't
haven't
hadn't
doesn't
don't
didn't
| auxiliary + negation
won't
wouldn't
shan't
shouldn't
can't
cannot
couldn't
mustn't
| miscellaneous forms
let's
that's
who's
what's
here's
there's
when's
where's
why's
how's
| rarer forms
| daren't needn't
| doubtful forms
| oughtn't mightn't
| ARTICLES
a
an
the
| THE REST (Overlap among prepositions, conjunctions, adverbs etc is so
| high, that classification is pointless.)
and
but
if
or
because
as
until
while
of
at
by
for
with
about
against
between
into
through
during
before
after
above
below
to
from
up
down
in
out
on
off
over
under
again
further
then
once
here
there
when
where
why
how
all
any
both
each
few
more
most
other
some
such
no
nor
not
only
own
same
so
than
too
very
| Just for the record, the following words are among the commonest in English
| one
| every
| least
| less
| many
| now
| ever
| never
| say
| says
| said
| also
| get
| go
| goes
| just
| made
| make
| put
| see
| seen
| whether
| like
| well
| back
| even
| still
| way
| take
| since
| another
| however
| two
| three
| four
| five
| first
| second
| new
| old
| high
| long
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(EnglishStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/es/analyzer_es.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package es
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "es"
func AnalyzerConstructor(config map[string]interface{},
cache *registry.Cache) (analysis.Analyzer, error) {
unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
normalizeEsFilter, err := cache.TokenFilterNamed(NormalizeName)
if err != nil {
return nil, err
}
stopEsFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
lightStemmerEsFilter, err := cache.TokenFilterNamed(LightStemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: unicodeTokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopEsFilter,
normalizeEsFilter,
lightStemmerEsFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/es/analyzer_es_test.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package es
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestSpanishAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stemming
{
input: []byte("chicana"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("chican"),
Position: 1,
Start: 0,
End: 7,
},
},
},
{
input: []byte("chicano"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("chican"),
Position: 1,
Start: 0,
End: 7,
},
},
},
// added by marty for better coverage
{
input: []byte("yeses"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("yes"),
Position: 1,
Start: 0,
End: 5,
},
},
},
{
input: []byte("jaeces"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("jaez"),
Position: 1,
Start: 0,
End: 6,
},
},
},
{
input: []byte("arcos"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("arc"),
Position: 1,
Start: 0,
End: 5,
},
},
},
{
input: []byte("caos"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("caos"),
Position: 1,
Start: 0,
End: 4,
},
},
},
{
input: []byte("parecer"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("parecer"),
Position: 1,
Start: 0,
End: 7,
},
},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
}
}
}
================================================
FILE: analysis/lang/es/light_stemmer_es.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package es
import (
"bytes"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const LightStemmerName = "stemmer_es_light"
type SpanishLightStemmerFilter struct {
}
func NewSpanishLightStemmerFilter() *SpanishLightStemmerFilter {
return &SpanishLightStemmerFilter{}
}
func (s *SpanishLightStemmerFilter) Filter(
input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
runes := bytes.Runes(token.Term)
runes = stem(runes)
token.Term = analysis.BuildTermFromRunes(runes)
}
return input
}
func stem(input []rune) []rune {
l := len(input)
if l < 5 {
return input
}
switch input[l-1] {
case 'o', 'a', 'e':
return input[:l-1]
case 's':
if input[l-2] == 'e' && input[l-3] == 's' && input[l-4] == 'e' {
return input[:l-2]
}
if input[l-2] == 'e' && input[l-3] == 'c' {
input[l-3] = 'z'
return input[:l-2]
}
if input[l-2] == 'o' || input[l-2] == 'a' || input[l-2] == 'e' {
return input[:l-2]
}
}
return input
}
func SpanishLightStemmerFilterConstructor(config map[string]interface{},
cache *registry.Cache) (analysis.TokenFilter, error) {
return NewSpanishLightStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(LightStemmerName, SpanishLightStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/es/spanish_normalize.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package es
import (
"bytes"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const NormalizeName = "normalize_es"
type SpanishNormalizeFilter struct {
}
func NewSpanishNormalizeFilter() *SpanishNormalizeFilter {
return &SpanishNormalizeFilter{}
}
func (s *SpanishNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
term := normalize(token.Term)
token.Term = term
}
return input
}
func normalize(input []byte) []byte {
runes := bytes.Runes(input)
for i := 0; i < len(runes); i++ {
switch runes[i] {
case 'à', 'á', 'â', 'ä':
runes[i] = 'a'
case 'ò', 'ó', 'ô', 'ö':
runes[i] = 'o'
case 'è', 'é', 'ê', 'ë':
runes[i] = 'e'
case 'ù', 'ú', 'û', 'ü':
runes[i] = 'u'
case 'ì', 'í', 'î', 'ï':
runes[i] = 'i'
}
}
return analysis.BuildTermFromRunes(runes)
}
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewSpanishNormalizeFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/es/spanish_normalize_test.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package es
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestSpanishNormalizeFilter(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Guía"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Guia"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Belcebú"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Belcebu"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Limón"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Limon"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("agüero"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("aguero"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("laúd"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("laud"),
},
},
},
// empty
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
}
spanishNormalizeFilter := NewSpanishNormalizeFilter()
for _, test := range tests {
actual := spanishNormalizeFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %#v, got %#v", test.output, actual)
t.Errorf("expected %s(% x), got %s(% x)", test.output[0].Term, test.output[0].Term, actual[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/es/stemmer_es_snowball.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package es
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/snowballstem"
"github.com/blevesearch/snowballstem/spanish"
)
const SnowballStemmerName = "stemmer_es_snowball"
type SpanishStemmerFilter struct {
}
func NewSpanishStemmerFilter() *SpanishStemmerFilter {
return &SpanishStemmerFilter{}
}
func (s *SpanishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
env := snowballstem.NewEnv(string(token.Term))
spanish.Stem(env)
token.Term = []byte(env.Current())
}
return input
}
func SpanishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewSpanishStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(SnowballStemmerName, SpanishStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/es/stemmer_es_snowball_test.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package es
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestSnowballSpanishStemmer(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("agresivos"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("agres"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("agresivamente"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("agres"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("agresividad"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("agres"),
},
},
},
}
cache := registry.NewCache()
filter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := filter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/es/stop_filter_es.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package es
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{},
cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/es/stop_words_es.go
================================================
package es
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_es"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var SpanishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Spanish stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
| The following is a ranked list (commonest to rarest) of stopwords
| deriving from a large sample of text.
| Extra words have been added at the end.
de | from, of
la | the, her
que | who, that
el | the
en | in
y | and
a | to
los | the, them
del | de + el
se | himself, from him etc
las | the, them
por | for, by, etc
un | a
para | for
con | with
no | no
una | a
su | his, her
al | a + el
| es from SER
lo | him
como | how
más | more
pero | pero
sus | su plural
le | to him, her
ya | already
o | or
| fue from SER
este | this
| ha from HABER
sí | himself etc
porque | because
esta | this
| son from SER
entre | between
| está from ESTAR
cuando | when
muy | very
sin | without
sobre | on
| ser from SER
| tiene from TENER
también | also
me | me
hasta | until
hay | there is/are
donde | where
| han from HABER
quien | whom, that
| están from ESTAR
| estado from ESTAR
desde | from
todo | all
nos | us
durante | during
| estados from ESTAR
todos | all
uno | a
les | to them
ni | nor
contra | against
otros | other
| fueron from SER
ese | that
eso | that
| había from HABER
ante | before
ellos | they
e | and (variant of y)
esto | this
mí | me
antes | before
algunos | some
qué | what?
unos | a
yo | I
otro | other
otras | other
otra | other
él | he
tanto | so much, many
esa | that
estos | these
mucho | much, many
quienes | who
nada | nothing
muchos | many
cual | who
| sea from SER
poco | few
ella | she
estar | to be
| haber from HABER
estas | these
| estaba from ESTAR
| estamos from ESTAR
algunas | some
algo | something
nosotros | we
| other forms
mi | me
mis | mi plural
tú | thou
te | thee
ti | thee
tu | thy
tus | tu plural
ellas | they
nosotras | we
vosotros | you
vosotras | you
os | you
mío | mine
mía |
míos |
mías |
tuyo | thine
tuya |
tuyos |
tuyas |
suyo | his, hers, theirs
suya |
suyos |
suyas |
nuestro | ours
nuestra |
nuestros |
nuestras |
vuestro | yours
vuestra |
vuestros |
vuestras |
esos | those
esas | those
| forms of estar, to be (not including the infinitive):
estoy
estás
está
estamos
estáis
están
esté
estés
estemos
estéis
estén
estaré
estarás
estará
estaremos
estaréis
estarán
estaría
estarías
estaríamos
estaríais
estarían
estaba
estabas
estábamos
estabais
estaban
estuve
estuviste
estuvo
estuvimos
estuvisteis
estuvieron
estuviera
estuvieras
estuviéramos
estuvierais
estuvieran
estuviese
estuvieses
estuviésemos
estuvieseis
estuviesen
estando
estado
estada
estados
estadas
estad
| forms of haber, to have (not including the infinitive):
he
has
ha
hemos
habéis
han
haya
hayas
hayamos
hayáis
hayan
habré
habrás
habrá
habremos
habréis
habrán
habría
habrías
habríamos
habríais
habrían
había
habías
habíamos
habíais
habían
hube
hubiste
hubo
hubimos
hubisteis
hubieron
hubiera
hubieras
hubiéramos
hubierais
hubieran
hubiese
hubieses
hubiésemos
hubieseis
hubiesen
habiendo
habido
habida
habidos
habidas
| forms of ser, to be (not including the infinitive):
soy
eres
es
somos
sois
son
sea
seas
seamos
seáis
sean
seré
serás
será
seremos
seréis
serán
sería
serías
seríamos
seríais
serían
era
eras
éramos
erais
eran
fui
fuiste
fue
fuimos
fuisteis
fueron
fuera
fueras
fuéramos
fuerais
fueran
fuese
fueses
fuésemos
fueseis
fuesen
siendo
sido
| sed also means 'thirst'
| forms of tener, to have (not including the infinitive):
tengo
tienes
tiene
tenemos
tenéis
tienen
tenga
tengas
tengamos
tengáis
tengan
tendré
tendrás
tendrá
tendremos
tendréis
tendrán
tendría
tendrías
tendríamos
tendríais
tendrían
tenía
tenías
teníamos
teníais
tenían
tuve
tuviste
tuvo
tuvimos
tuvisteis
tuvieron
tuviera
tuvieras
tuviéramos
tuvierais
tuvieran
tuviese
tuvieses
tuviésemos
tuvieseis
tuviesen
teniendo
tenido
tenida
tenidos
tenidas
tened
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(SpanishStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/eu/stop_filter_eu.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package eu
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/eu/stop_words_eu.go
================================================
package eu
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_eu"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
// ` was changed to ' to allow for literal string
var BasqueStopWords = []byte(`# example set of basque stopwords
al
anitz
arabera
asko
baina
bat
batean
batek
bati
batzuei
batzuek
batzuetan
batzuk
bera
beraiek
berau
berauek
bere
berori
beroriek
beste
bezala
da
dago
dira
ditu
du
dute
edo
egin
ere
eta
eurak
ez
gainera
gu
gutxi
guzti
haiei
haiek
haietan
hainbeste
hala
han
handik
hango
hara
hari
hark
hartan
hau
hauei
hauek
hauetan
hemen
hemendik
hemengo
hi
hona
honek
honela
honetan
honi
hor
hori
horiei
horiek
horietan
horko
horra
horrek
horrela
horretan
horri
hortik
hura
izan
ni
noiz
nola
non
nondik
nongo
nor
nora
ze
zein
zen
zenbait
zenbat
zer
zergatik
ziren
zituen
zu
zuek
zuen
zuten
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(BasqueStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/fa/analyzer_fa.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fa
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/char/zerowidthnonjoiner"
"github.com/blevesearch/bleve/v2/analysis/lang/ar"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "fa"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
zFilter, err := cache.CharFilterNamed(zerowidthnonjoiner.Name)
if err != nil {
return nil, err
}
unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
normArFilter, err := cache.TokenFilterNamed(ar.NormalizeName)
if err != nil {
return nil, err
}
normFaFilter, err := cache.TokenFilterNamed(NormalizeName)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopFaFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
CharFilters: []analysis.CharFilter{
zFilter,
},
Tokenizer: unicodeTokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
normArFilter,
normFaFilter,
stopFaFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/fa/analyzer_fa_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fa
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestPersianAnalyzerVerbs(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// active present indicative
{
input: []byte("میخورد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورد"),
},
},
},
// active preterite indicative
{
input: []byte("خورد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورد"),
},
},
},
// active imperfective preterite indicative
{
input: []byte("میخورد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورد"),
},
},
},
// active future indicative
{
input: []byte("خواهد خورد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورد"),
},
},
},
// active present progressive indicative
{
input: []byte("دارد میخورد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورد"),
},
},
},
// active preterite progressive indicative
{
input: []byte("داشت میخورد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورد"),
},
},
},
// active perfect indicative
{
input: []byte("خوردهاست"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// active imperfective perfect indicative
{
input: []byte("میخوردهاست"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// active pluperfect indicative
{
input: []byte("خورده بود"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// active imperfective pluperfect indicative
{
input: []byte("میخورده بود"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// active preterite subjunctive
{
input: []byte("خورده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// active imperfective preterite subjunctive
{
input: []byte("میخورده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// active pluperfect subjunctive
{
input: []byte("خورده بوده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// active imperfective pluperfect subjunctive
{
input: []byte("میخورده بوده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive present indicative
{
input: []byte("خورده میشود"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive preterite indicative
{
input: []byte("خورده شد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive imperfective preterite indicative
{
input: []byte("خورده میشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive perfect indicative
{
input: []byte("خورده شدهاست"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive imperfective perfect indicative
{
input: []byte("خورده میشدهاست"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive pluperfect indicative
{
input: []byte("خورده شده بود"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive imperfective pluperfect indicative
{
input: []byte("خورده میشده بود"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive future indicative
{
input: []byte("خورده خواهد شد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive present progressive indicative
{
input: []byte("دارد خورده میشود"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive preterite progressive indicative
{
input: []byte("داشت خورده میشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive present subjunctive
{
input: []byte("خورده شود"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive preterite subjunctive
{
input: []byte("خورده شده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive imperfective preterite subjunctive
{
input: []byte("خورده میشده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive pluperfect subjunctive
{
input: []byte("خورده شده بوده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive imperfective pluperfect subjunctive
{
input: []byte("خورده میشده بوده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// active present subjunctive
{
input: []byte("بخورد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("بخورد"),
},
},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}
func TestPersianAnalyzerVerbsDefective(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// active present indicative
{
input: []byte("مي خورد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورد"),
},
},
},
// active preterite indicative
{
input: []byte("خورد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورد"),
},
},
},
// active imperfective preterite indicative
{
input: []byte("مي خورد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورد"),
},
},
},
// active future indicative
{
input: []byte("خواهد خورد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورد"),
},
},
},
// active present progressive indicative
{
input: []byte("دارد مي خورد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورد"),
},
},
},
// active preterite progressive indicative
{
input: []byte("داشت مي خورد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورد"),
},
},
},
// active perfect indicative
{
input: []byte("خورده است"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// active imperfective perfect indicative
{
input: []byte("مي خورده است"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// active pluperfect indicative
{
input: []byte("خورده بود"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// active imperfective pluperfect indicative
{
input: []byte("مي خورده بود"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// active preterite subjunctive
{
input: []byte("خورده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// active imperfective preterite subjunctive
{
input: []byte("مي خورده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// active pluperfect subjunctive
{
input: []byte("خورده بوده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// active imperfective pluperfect subjunctive
{
input: []byte("مي خورده بوده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive present indicative
{
input: []byte("خورده مي شود"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive preterite indicative
{
input: []byte("خورده شد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive imperfective preterite indicative
{
input: []byte("خورده مي شد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive perfect indicative
{
input: []byte("خورده شده است"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive imperfective perfect indicative
{
input: []byte("خورده مي شده است"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive pluperfect indicative
{
input: []byte("خورده شده بود"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive imperfective pluperfect indicative
{
input: []byte("خورده مي شده بود"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive future indicative
{
input: []byte("خورده خواهد شد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive present progressive indicative
{
input: []byte("دارد خورده مي شود"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive preterite progressive indicative
{
input: []byte("داشت خورده مي شد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive present subjunctive
{
input: []byte("خورده شود"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive preterite subjunctive
{
input: []byte("خورده شده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive imperfective preterite subjunctive
{
input: []byte("خورده مي شده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive pluperfect subjunctive
{
input: []byte("خورده شده بوده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// passive imperfective pluperfect subjunctive
{
input: []byte("خورده مي شده بوده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
// active present subjunctive
{
input: []byte("بخورد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("بخورد"),
},
},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}
func TestPersianAnalyzerOthers(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// nouns
{
input: []byte("برگ ها"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("برگ"),
},
},
},
{
input: []byte("برگها"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("برگ"),
},
},
},
// non persian
{
input: []byte("English test."),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("english"),
},
&analysis.Token{
Term: []byte("test"),
},
},
},
// others
{
input: []byte("خورده مي شده بوده باشد"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("خورده"),
},
},
},
{
input: []byte("برگها"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("برگ"),
},
},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}
================================================
FILE: analysis/lang/fa/persian_normalize.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fa
import (
"bytes"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const NormalizeName = "normalize_fa"
const (
Yeh = '\u064A'
FarsiYeh = '\u06CC'
YehBarree = '\u06D2'
Keheh = '\u06A9'
Kaf = '\u0643'
HamzaAbove = '\u0654'
HehYeh = '\u06C0'
HehGoal = '\u06C1'
Heh = '\u0647'
)
type PersianNormalizeFilter struct {
}
func NewPersianNormalizeFilter() *PersianNormalizeFilter {
return &PersianNormalizeFilter{}
}
func (s *PersianNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
term := normalize(token.Term)
token.Term = term
}
return input
}
func normalize(input []byte) []byte {
runes := bytes.Runes(input)
for i := 0; i < len(runes); i++ {
switch runes[i] {
case FarsiYeh, YehBarree:
runes[i] = Yeh
case Keheh:
runes[i] = Kaf
case HehYeh, HehGoal:
runes[i] = Heh
case HamzaAbove: // necessary for HEH + HAMZA
runes = analysis.DeleteRune(runes, i)
i--
}
}
return analysis.BuildTermFromRunes(runes)
}
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewPersianNormalizeFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/fa/persian_normalize_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fa
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestPersianNormalizeFilter(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
// FarsiYeh
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("های"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("هاي"),
},
},
},
// YehBarree
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("هاے"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("هاي"),
},
},
},
// Keheh
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("کشاندن"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("كشاندن"),
},
},
},
// HehYeh
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("كتابۀ"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("كتابه"),
},
},
},
// HehHamzaAbove
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("كتابهٔ"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("كتابه"),
},
},
},
// HehGoal
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("زادہ"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("زاده"),
},
},
},
// empty
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
}
persianNormalizeFilter := NewPersianNormalizeFilter()
for _, test := range tests {
actual := persianNormalizeFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %#v, got %#v", test.output, actual)
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/fa/stop_filter_fa.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fa
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/fa/stop_words_fa.go
================================================
package fa
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_fa"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
// ` was changed to ' to allow for literal string
var PersianStopWords = []byte(`# This file was created by Jacques Savoy and is distributed under the BSD license.
# See http://members.unine.ch/jacques.savoy/clef/index.html.
# Also see http://www.opensource.org/licenses/bsd-license.html
# Note: by default this file is used after normalization, so when adding entries
# to this file, use the arabic 'ي' instead of 'ی'
انان
نداشته
سراسر
خياه
ايشان
وي
تاكنون
بيشتري
دوم
پس
ناشي
وگو
يا
داشتند
سپس
هنگام
هرگز
پنج
نشان
امسال
ديگر
گروهي
شدند
چطور
ده
و
دو
نخستين
ولي
چرا
چه
وسط
ه
كدام
قابل
يك
رفت
هفت
همچنين
در
هزار
بله
بلي
شايد
اما
شناسي
گرفته
دهد
داشته
دانست
داشتن
خواهيم
ميليارد
وقتيكه
امد
خواهد
جز
اورده
شده
بلكه
خدمات
شدن
برخي
نبود
بسياري
جلوگيري
حق
كردند
نوعي
بعري
نكرده
نظير
نبايد
بوده
بودن
داد
اورد
هست
جايي
شود
دنبال
داده
بايد
سابق
هيچ
همان
انجا
كمتر
كجاست
گردد
كسي
تر
مردم
تان
دادن
بودند
سري
جدا
ندارند
مگر
يكديگر
دارد
دهند
بنابراين
هنگامي
سمت
جا
انچه
خود
دادند
زياد
دارند
اثر
بدون
بهترين
بيشتر
البته
به
براساس
بيرون
كرد
بعضي
گرفت
توي
اي
ميليون
او
جريان
تول
بر
مانند
برابر
باشيم
مدتي
گويند
اكنون
تا
تنها
جديد
چند
بي
نشده
كردن
كردم
گويد
كرده
كنيم
نمي
نزد
روي
قصد
فقط
بالاي
ديگران
اين
ديروز
توسط
سوم
ايم
دانند
سوي
استفاده
شما
كنار
داريم
ساخته
طور
امده
رفته
نخست
بيست
نزديك
طي
كنيد
از
انها
تمامي
داشت
يكي
طريق
اش
چيست
روب
نمايد
گفت
چندين
چيزي
تواند
ام
ايا
با
ان
ايد
ترين
اينكه
ديگري
راه
هايي
بروز
همچنان
پاعين
كس
حدود
مختلف
مقابل
چيز
گيرد
ندارد
ضد
همچون
سازي
شان
مورد
باره
مرسي
خويش
برخوردار
چون
خارج
شش
هنوز
تحت
ضمن
هستيم
گفته
فكر
بسيار
پيش
براي
روزهاي
انكه
نخواهد
بالا
كل
وقتي
كي
چنين
كه
گيري
نيست
است
كجا
كند
نيز
يابد
بندي
حتي
توانند
عقب
خواست
كنند
بين
تمام
همه
ما
باشند
مثل
شد
اري
باشد
اره
طبق
بعد
اگر
صورت
غير
جاي
بيش
ريزي
اند
زيرا
چگونه
بار
لطفا
مي
درباره
من
ديده
همين
گذاري
برداري
علت
گذاشته
هم
فوق
نه
ها
شوند
اباد
همواره
هر
اول
خواهند
چهار
نام
امروز
مان
هاي
قبل
كنم
سعي
تازه
را
هستند
زير
جلوي
عنوان
بود
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(PersianStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/fi/analyzer_fi.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fi
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "fi"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopFiFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerFiFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: unicodeTokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopFiFilter,
stemmerFiFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/fi/analyzer_fi_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fi
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestFinishAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stemming
{
input: []byte("edeltäjiinsä"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("edeltäj"),
},
},
},
{
input: []byte("edeltäjistään"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("edeltäj"),
},
},
},
// stop word
{
input: []byte("olla"),
output: analysis.TokenStream{},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}
================================================
FILE: analysis/lang/fi/stemmer_fi.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fi
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/snowballstem"
"github.com/blevesearch/snowballstem/finnish"
)
const SnowballStemmerName = "stemmer_fi_snowball"
type FinnishStemmerFilter struct {
}
func NewFinnishStemmerFilter() *FinnishStemmerFilter {
return &FinnishStemmerFilter{}
}
func (s *FinnishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
env := snowballstem.NewEnv(string(token.Term))
finnish.Stem(env)
token.Term = []byte(env.Current())
}
return input
}
func FinnishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewFinnishStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(SnowballStemmerName, FinnishStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/fi/stop_filter_fi.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fi
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/fi/stop_words_fi.go
================================================
package fi
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_fi"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var FinnishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| forms of BE
olla
olen
olet
on
olemme
olette
ovat
ole | negative form
oli
olisi
olisit
olisin
olisimme
olisitte
olisivat
olit
olin
olimme
olitte
olivat
ollut
olleet
en | negation
et
ei
emme
ette
eivät
|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
minä minun minut minua minussa minusta minuun minulla minulta minulle | I
sinä sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
hän hänen hänet häntä hänessä hänestä häneen hänellä häneltä hänelle | he she
me meidän meidät meitä meissä meistä meihin meillä meiltä meille | we
te teidän teidät teitä teissä teistä teihin teillä teiltä teille | you
he heidän heidät heitä heissä heistä heihin heillä heiltä heille | they
tämä tämän tätä tässä tästä tähän tallä tältä tälle tänä täksi | this
tuo tuon tuotä tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
se sen sitä siinä siitä siihen sillä siltä sille sinä siksi | it
nämä näiden näitä näissä näistä näihin näillä näiltä näille näinä näiksi | these
nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
ne niiden niitä niissä niistä niihin niillä niiltä niille niinä niiksi | they
kuka kenen kenet ketä kenessä kenestä keneen kenellä keneltä kenelle kenenä keneksi| who
ketkä keiden ketkä keitä keissä keistä keihin keillä keiltä keille keinä keiksi | (pl)
mikä minkä minkä mitä missä mistä mihin millä miltä mille minä miksi | which what
mitkä | (pl)
joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
| conjunctions
että | that
ja | and
jos | if
koska | because
kuin | than
mutta | but
niin | so
sekä | and
sillä | for
tai | or
vaan | but
vai | or
vaikka | although
| prepositions
kanssa | with
mukaan | according to
noin | about
poikki | across
yli | over, across
| other
kun | when
niin | so
nyt | now
itse | self
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(FinnishStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/fr/analyzer_fr.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fr
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "fr"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopFrFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerFrFilter, err := cache.TokenFilterNamed(LightStemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
elisionFilter,
stopFrFilter,
stemmerFrFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/fr/analyzer_fr_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fr
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestFrenchAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
{
input: []byte(""),
output: analysis.TokenStream{},
},
{
input: []byte("chien chat cheval"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("chien"),
},
&analysis.Token{
Term: []byte("chat"),
},
&analysis.Token{
Term: []byte("cheval"),
},
},
},
{
input: []byte("chien CHAT CHEVAL"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("chien"),
},
&analysis.Token{
Term: []byte("chat"),
},
&analysis.Token{
Term: []byte("cheval"),
},
},
},
{
input: []byte(" chien ,? + = - CHAT /: > CHEVAL"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("chien"),
},
&analysis.Token{
Term: []byte("chat"),
},
&analysis.Token{
Term: []byte("cheval"),
},
},
},
{
input: []byte("chien++"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("chien"),
},
},
},
{
input: []byte("mot \"entreguillemet\""),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("mot"),
},
&analysis.Token{
Term: []byte("entreguilemet"),
},
},
},
{
input: []byte("Jean-François"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("jean"),
},
&analysis.Token{
Term: []byte("francoi"),
},
},
},
// stop words
{
input: []byte("le la chien les aux chat du des à cheval"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("chien"),
},
&analysis.Token{
Term: []byte("chat"),
},
&analysis.Token{
Term: []byte("cheval"),
},
},
},
// nouns and adjectives
{
input: []byte("lances chismes habitable chiste éléments captifs"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("lanc"),
},
&analysis.Token{
Term: []byte("chism"),
},
&analysis.Token{
Term: []byte("habitabl"),
},
&analysis.Token{
Term: []byte("chist"),
},
&analysis.Token{
Term: []byte("element"),
},
&analysis.Token{
Term: []byte("captif"),
},
},
},
// verbs
{
input: []byte("finissions souffrirent rugissante"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("finision"),
},
&analysis.Token{
Term: []byte("soufrirent"),
},
&analysis.Token{
Term: []byte("rugisant"),
},
},
},
{
input: []byte("C3PO aujourd'hui oeuf ïâöûàä anticonstitutionnellement Java++ "),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("c3po"),
},
&analysis.Token{
Term: []byte("aujourd'hui"),
},
&analysis.Token{
Term: []byte("oeuf"),
},
&analysis.Token{
Term: []byte("ïaöuaä"),
},
&analysis.Token{
Term: []byte("anticonstitutionel"),
},
&analysis.Token{
Term: []byte("java"),
},
},
},
{
input: []byte("propriétaire"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("proprietair"),
},
},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}
================================================
FILE: analysis/lang/fr/articles_fr.go
================================================
package fr
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const ArticlesName = "articles_fr"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
var FrenchArticles = []byte(`
l
m
t
qu
n
s
j
d
c
jusqu
quoiqu
lorsqu
puisqu
`)
func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(FrenchArticles)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/fr/elision_fr.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fr
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/elision"
"github.com/blevesearch/bleve/v2/registry"
)
const ElisionName = "elision_fr"
func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
articlesTokenMap, err := cache.TokenMapNamed(ArticlesName)
if err != nil {
return nil, fmt.Errorf("error building elision filter: %v", err)
}
return elision.NewElisionFilter(articlesTokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/fr/elision_fr_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fr
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestFrenchElision(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("l'avion"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("avion"),
},
},
},
}
cache := registry.NewCache()
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := elisionFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/fr/light_stemmer_fr.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fr
import (
"bytes"
"unicode"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const LightStemmerName = "stemmer_fr_light"
type FrenchLightStemmerFilter struct {
}
func NewFrenchLightStemmerFilter() *FrenchLightStemmerFilter {
return &FrenchLightStemmerFilter{}
}
func (s *FrenchLightStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
runes := bytes.Runes(token.Term)
runes = stem(runes)
token.Term = analysis.BuildTermFromRunes(runes)
}
return input
}
func stem(input []rune) []rune {
inputLen := len(input)
if inputLen > 5 && input[inputLen-1] == 'x' {
if input[inputLen-3] == 'a' && input[inputLen-2] == 'u' && input[inputLen-4] != 'e' {
input[inputLen-2] = 'l'
}
input = input[0 : inputLen-1]
inputLen = len(input)
}
if inputLen > 3 && input[inputLen-1] == 'x' {
input = input[0 : inputLen-1]
inputLen = len(input)
}
if inputLen > 3 && input[inputLen-1] == 's' {
input = input[0 : inputLen-1]
inputLen = len(input)
}
if inputLen > 9 && analysis.RunesEndsWith(input, "issement") {
input = input[0 : inputLen-6]
inputLen = len(input)
input[inputLen-1] = 'r'
return norm(input)
}
if inputLen > 8 && analysis.RunesEndsWith(input, "issant") {
input = input[0 : inputLen-4]
inputLen = len(input)
input[inputLen-1] = 'r'
return norm(input)
}
if inputLen > 6 && analysis.RunesEndsWith(input, "ement") {
input = input[0 : inputLen-4]
inputLen = len(input)
if inputLen > 3 && analysis.RunesEndsWith(input, "ive") {
input = input[0 : inputLen-1]
inputLen = len(input)
input[inputLen-1] = 'f'
}
return norm(input)
}
if inputLen > 11 && analysis.RunesEndsWith(input, "ficatrice") {
input = input[0 : inputLen-5]
inputLen = len(input)
input[inputLen-2] = 'e'
input[inputLen-1] = 'r'
return norm(input)
}
if inputLen > 10 && analysis.RunesEndsWith(input, "ficateur") {
input = input[0 : inputLen-4]
inputLen = len(input)
input[inputLen-2] = 'e'
input[inputLen-1] = 'r'
return norm(input)
}
if inputLen > 9 && analysis.RunesEndsWith(input, "catrice") {
input = input[0 : inputLen-3]
inputLen = len(input)
input[inputLen-4] = 'q'
input[inputLen-3] = 'u'
input[inputLen-2] = 'e'
//s[len-1] = 'r' <-- unnecessary, already 'r'.
return norm(input)
}
if inputLen > 8 && analysis.RunesEndsWith(input, "cateur") {
input = input[0 : inputLen-2]
inputLen = len(input)
input[inputLen-4] = 'q'
input[inputLen-3] = 'u'
input[inputLen-2] = 'e'
input[inputLen-1] = 'r'
return norm(input)
}
if inputLen > 8 && analysis.RunesEndsWith(input, "atrice") {
input = input[0 : inputLen-4]
inputLen = len(input)
input[inputLen-2] = 'e'
input[inputLen-1] = 'r'
return norm(input)
}
if inputLen > 7 && analysis.RunesEndsWith(input, "ateur") {
input = input[0 : inputLen-3]
inputLen = len(input)
input[inputLen-2] = 'e'
input[inputLen-1] = 'r'
return norm(input)
}
if inputLen > 6 && analysis.RunesEndsWith(input, "trice") {
input = input[0 : inputLen-1]
inputLen = len(input)
input[inputLen-3] = 'e'
input[inputLen-2] = 'u'
input[inputLen-1] = 'r'
}
if inputLen > 5 && analysis.RunesEndsWith(input, "ième") {
return norm(input[0 : inputLen-4])
}
if inputLen > 7 && analysis.RunesEndsWith(input, "teuse") {
input = input[0 : inputLen-2]
inputLen = len(input)
input[inputLen-1] = 'r'
return norm(input)
}
if inputLen > 6 && analysis.RunesEndsWith(input, "teur") {
input = input[0 : inputLen-1]
inputLen = len(input)
input[inputLen-1] = 'r'
return norm(input)
}
if inputLen > 5 && analysis.RunesEndsWith(input, "euse") {
return norm(input[0 : inputLen-2])
}
if inputLen > 8 && analysis.RunesEndsWith(input, "ère") {
input = input[0 : inputLen-1]
inputLen = len(input)
input[inputLen-2] = 'e'
return norm(input)
}
if inputLen > 7 && analysis.RunesEndsWith(input, "ive") {
input = input[0 : inputLen-1]
inputLen = len(input)
input[inputLen-1] = 'f'
return norm(input)
}
if inputLen > 4 &&
(analysis.RunesEndsWith(input, "folle") ||
analysis.RunesEndsWith(input, "molle")) {
input = input[0 : inputLen-2]
inputLen = len(input)
input[inputLen-1] = 'u'
return norm(input)
}
if inputLen > 9 && analysis.RunesEndsWith(input, "nnelle") {
return norm(input[0 : inputLen-5])
}
if inputLen > 9 && analysis.RunesEndsWith(input, "nnel") {
return norm(input[0 : inputLen-3])
}
if inputLen > 4 && analysis.RunesEndsWith(input, "ète") {
input = input[0 : inputLen-1]
inputLen = len(input)
input[inputLen-2] = 'e'
}
if inputLen > 8 && analysis.RunesEndsWith(input, "ique") {
input = input[0 : inputLen-4]
inputLen = len(input)
}
if inputLen > 8 && analysis.RunesEndsWith(input, "esse") {
return norm(input[0 : inputLen-3])
}
if inputLen > 7 && analysis.RunesEndsWith(input, "inage") {
return norm(input[0 : inputLen-3])
}
if inputLen > 9 && analysis.RunesEndsWith(input, "isation") {
input = input[0 : inputLen-7]
inputLen = len(input)
if inputLen > 5 && analysis.RunesEndsWith(input, "ual") {
input[inputLen-2] = 'e'
}
return norm(input)
}
if inputLen > 9 && analysis.RunesEndsWith(input, "isateur") {
return norm(input[0 : inputLen-7])
}
if inputLen > 8 && analysis.RunesEndsWith(input, "ation") {
return norm(input[0 : inputLen-5])
}
if inputLen > 8 && analysis.RunesEndsWith(input, "ition") {
return norm(input[0 : inputLen-5])
}
return norm(input)
}
func norm(input []rune) []rune {
if len(input) > 4 {
for i := 0; i < len(input); i++ {
switch input[i] {
case 'à', 'á', 'â':
input[i] = 'a'
case 'ô':
input[i] = 'o'
case 'è', 'é', 'ê':
input[i] = 'e'
case 'ù', 'û':
input[i] = 'u'
case 'î':
input[i] = 'i'
case 'ç':
input[i] = 'c'
}
ch := input[0]
for i := 1; i < len(input); i++ {
if input[i] == ch && unicode.IsLetter(ch) {
input = analysis.DeleteRune(input, i)
i -= 1
} else {
ch = input[i]
}
}
}
}
if len(input) > 4 && analysis.RunesEndsWith(input, "ie") {
input = input[0 : len(input)-2]
}
if len(input) > 4 {
if input[len(input)-1] == 'r' {
input = input[0 : len(input)-1]
}
if input[len(input)-1] == 'e' {
input = input[0 : len(input)-1]
}
if input[len(input)-1] == 'e' {
input = input[0 : len(input)-1]
}
if input[len(input)-1] == input[len(input)-2] && unicode.IsLetter(input[len(input)-1]) {
input = input[0 : len(input)-1]
}
}
return input
}
func FrenchLightStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewFrenchLightStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(LightStemmerName, FrenchLightStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/fr/light_stemmer_fr_test.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fr
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestFrenchLightStemmer(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("chevaux"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("cheval"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("cheval"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("cheval"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("hiboux"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("hibou"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("hibou"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("hibou"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("chantés"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("chant"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("chanter"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("chant"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("chante"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("chant"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("chant"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("chant"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("baronnes"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("baron"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("barons"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("baron"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("baron"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("baron"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("peaux"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("peau"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("peau"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("peau"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("anneaux"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("aneau"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("anneau"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("aneau"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("neveux"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("neveu"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("neveu"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("neveu"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("affreux"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("afreu"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("affreuse"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("afreu"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("investissement"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("investi"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("investir"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("investi"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("assourdissant"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("asourdi"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("assourdir"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("asourdi"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("pratiquement"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("pratiqu"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("pratique"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("pratiqu"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("administrativement"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("administratif"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("administratif"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("administratif"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("justificatrice"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("justifi"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("justificateur"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("justifi"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("justifier"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("justifi"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("educatrice"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("eduqu"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("eduquer"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("eduqu"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("communicateur"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("comuniqu"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("communiquer"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("comuniqu"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("accompagnatrice"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("acompagn"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("accompagnateur"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("acompagn"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("administrateur"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("administr"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("administrer"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("administr"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("productrice"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("product"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("producteur"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("product"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("acheteuse"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("achet"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("acheteur"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("achet"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("planteur"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("plant"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("plante"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("plant"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("poreuse"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("poreu"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("poreux"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("poreu"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("plieuse"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("plieu"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("bijoutière"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("bijouti"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("bijoutier"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("bijouti"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("caissière"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("caisi"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("caissier"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("caisi"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("abrasive"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("abrasif"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("abrasif"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("abrasif"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("folle"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("fou"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("fou"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("fou"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("personnelle"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("person"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("personne"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("person"),
},
},
},
// algo bug: too short length
// {
// input: analysis.TokenStream{
// &analysis.Token{
// Term: []byte("personnel"),
// },
// },
// output: analysis.TokenStream{
// &analysis.Token{
// Term: []byte("person"),
// },
// },
// },
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("complète"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("complet"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("complet"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("complet"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("aromatique"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("aromat"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("faiblesse"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("faibl"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("faible"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("faibl"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("patinage"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("patin"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("patin"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("patin"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("sonorisation"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("sono"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ritualisation"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("rituel"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("rituel"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("rituel"),
},
},
},
// algo bug: masked by rules above
// {
// input: analysis.TokenStream{
// &analysis.Token{
// Term: []byte("colonisateur"),
// },
// },
// output: analysis.TokenStream{
// &analysis.Token{
// Term: []byte("colon"),
// },
// },
// },
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("nomination"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("nomin"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("disposition"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("dispos"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("dispose"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("dispos"),
},
},
},
// SOLR-3463 : abusive compression of repeated characters in numbers
// Trailing repeated char elision :
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("1234555"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("1234555"),
},
},
},
// Repeated char within numbers with more than 4 characters :
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("12333345"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("12333345"),
},
},
},
// Short numbers weren't affected already:
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("1234"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("1234"),
},
},
},
// Ensure behaviour is preserved for words!
// Trailing repeated char elision :
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("abcdeff"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("abcdef"),
},
},
},
// Repeated char within words with more than 4 characters :
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("abcccddeef"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("abcdef"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("créées"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("cre"),
},
},
},
// Combined letter and digit repetition
// 10:00pm
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("22hh00"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("22h00"),
},
},
},
// bug #214
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("propriétaire"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("proprietair"),
},
},
},
}
cache := registry.NewCache()
filter, err := cache.TokenFilterNamed(LightStemmerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := filter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/fr/minimal_stemmer_fr.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fr
import (
"bytes"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const MinimalStemmerName = "stemmer_fr_min"
type FrenchMinimalStemmerFilter struct {
}
func NewFrenchMinimalStemmerFilter() *FrenchMinimalStemmerFilter {
return &FrenchMinimalStemmerFilter{}
}
func (s *FrenchMinimalStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
runes := bytes.Runes(token.Term)
runes = minstem(runes)
token.Term = analysis.BuildTermFromRunes(runes)
}
return input
}
func minstem(input []rune) []rune {
if len(input) < 6 {
return input
}
if input[len(input)-1] == 'x' {
if input[len(input)-3] == 'a' && input[len(input)-2] == 'u' {
input[len(input)-2] = 'l'
}
return input[0 : len(input)-1]
}
if input[len(input)-1] == 's' {
input = input[0 : len(input)-1]
}
if input[len(input)-1] == 'r' {
input = input[0 : len(input)-1]
}
if input[len(input)-1] == 'e' {
input = input[0 : len(input)-1]
}
if input[len(input)-1] == 'é' {
input = input[0 : len(input)-1]
}
if input[len(input)-1] == input[len(input)-2] {
input = input[0 : len(input)-1]
}
return input
}
func FrenchMinimalStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewFrenchMinimalStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(MinimalStemmerName, FrenchMinimalStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/fr/minimal_stemmer_fr_test.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fr
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestFrenchMinimalStemmer(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("chevaux"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("cheval"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("hiboux"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("hibou"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("chantés"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("chant"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("chanter"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("chant"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("chante"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("chant"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("baronnes"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("baron"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("barons"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("baron"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("baron"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("baron"),
},
},
},
}
cache := registry.NewCache()
filter, err := cache.TokenFilterNamed(MinimalStemmerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := filter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/fr/stemmer_fr_snowball.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fr
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/snowballstem"
"github.com/blevesearch/snowballstem/french"
)
const SnowballStemmerName = "stemmer_fr_snowball"
type FrenchStemmerFilter struct {
}
func NewFrenchStemmerFilter() *FrenchStemmerFilter {
return &FrenchStemmerFilter{}
}
func (s *FrenchStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
env := snowballstem.NewEnv(string(token.Term))
french.Stem(env)
token.Term = []byte(env.Current())
}
return input
}
func FrenchStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewFrenchStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(SnowballStemmerName, FrenchStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/fr/stemmer_fr_snowball_test.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fr
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestSnowballFrenchStemmer(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("antagoniste"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("antagon"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("barbouillait"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("barbouill"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("calculateur"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("calcul"),
},
},
},
}
cache := registry.NewCache()
filter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := filter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/fr/stop_filter_fr.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fr
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/fr/stop_words_fr.go
================================================
package fr
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_fr"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var FrenchStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A French stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
au | a + le
aux | a + les
avec | with
ce | this
ces | these
dans | with
de | of
des | de + les
du | de + le
elle | she
en | 'of them' etc
et | and
eux | them
il | he
je | I
la | the
le | the
leur | their
lui | him
ma | my (fem)
mais | but
me | me
même | same; as in moi-même (myself) etc
mes | me (pl)
moi | me
mon | my (masc)
ne | not
nos | our (pl)
notre | our
nous | we
on | one
ou | where
par | by
pas | not
pour | for
qu | que before vowel
que | that
qui | who
sa | his, her (fem)
se | oneself
ses | his (pl)
son | his, her (masc)
sur | on
ta | thy (fem)
te | thee
tes | thy (pl)
toi | thee
ton | thy (masc)
tu | thou
un | a
une | a
vos | your (pl)
votre | your
vous | you
| single letter forms
c | c'
d | d'
j | j'
l | l'
à | to, at
m | m'
n | n'
s | s'
t | t'
y | there
| forms of être (not including the infinitive):
été
étée
étées
étés
étant
suis
es
est
sommes
êtes
sont
serai
seras
sera
serons
serez
seront
serais
serait
serions
seriez
seraient
étais
était
étions
étiez
étaient
fus
fut
fûmes
fûtes
furent
sois
soit
soyons
soyez
soient
fusse
fusses
fût
fussions
fussiez
fussent
| forms of avoir (not including the infinitive):
ayant
eu
eue
eues
eus
ai
as
avons
avez
ont
aurai
auras
aura
aurons
aurez
auront
aurais
aurait
aurions
auriez
auraient
avais
avait
avions
aviez
avaient
eut
eûmes
eûtes
eurent
aie
aies
ait
ayons
ayez
aient
eusse
eusses
eût
eussions
eussiez
eussent
| Later additions (from Jean-Christophe Deschamps)
ceci | this
cela | that
celà | that
cet | this
cette | this
ici | here
ils | they
les | the (pl)
leurs | their (pl)
quel | which
quels | which
quelle | which
quelles | which
sans | without
soi | oneself
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(FrenchStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ga/articles_ga.go
================================================
package ga
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const ArticlesName = "articles_ga"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
var IrishArticles = []byte(`
d
m
b
`)
func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(IrishArticles)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ga/elision_ga.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ga
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/elision"
"github.com/blevesearch/bleve/v2/registry"
)
const ElisionName = "elision_ga"
func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
articlesTokenMap, err := cache.TokenMapNamed(ArticlesName)
if err != nil {
return nil, fmt.Errorf("error building elision filter: %v", err)
}
return elision.NewElisionFilter(articlesTokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ga/elision_ga_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ga
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestFrenchElision(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("b'fhearr"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("fhearr"),
},
},
},
}
cache := registry.NewCache()
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := elisionFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/ga/stop_filter_ga.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ga
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ga/stop_words_ga.go
================================================
package ga
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_ga"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var IrishStopWords = []byte(`
a
ach
ag
agus
an
aon
ar
arna
as
b'
ba
beirt
bhúr
caoga
ceathair
ceathrar
chomh
chtó
chuig
chun
cois
céad
cúig
cúigear
d'
daichead
dar
de
deich
deichniúr
den
dhá
do
don
dtí
dá
dár
dó
faoi
faoin
faoina
faoinár
fara
fiche
gach
gan
go
gur
haon
hocht
i
iad
idir
in
ina
ins
inár
is
le
leis
lena
lenár
m'
mar
mo
mé
na
nach
naoi
naonúr
ná
ní
níor
nó
nócha
ocht
ochtar
os
roimh
sa
seacht
seachtar
seachtó
seasca
seisear
siad
sibh
sinn
sna
sé
sí
tar
thar
thú
triúr
trí
trína
trínár
tríocha
tú
um
ár
é
éis
í
ó
ón
óna
ónár
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(IrishStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/gl/stop_filter_gl.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package gl
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/gl/stop_words_gl.go
================================================
package gl
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_gl"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
// ` was changed to ' to allow for literal string
var GalicianStopWords = []byte(`# galican stopwords
a
aínda
alí
aquel
aquela
aquelas
aqueles
aquilo
aquí
ao
aos
as
así
á
ben
cando
che
co
coa
comigo
con
connosco
contigo
convosco
coas
cos
cun
cuns
cunha
cunhas
da
dalgunha
dalgunhas
dalgún
dalgúns
das
de
del
dela
delas
deles
desde
deste
do
dos
dun
duns
dunha
dunhas
e
el
ela
elas
eles
en
era
eran
esa
esas
ese
eses
esta
estar
estaba
está
están
este
estes
estiven
estou
eu
é
facer
foi
foron
fun
había
hai
iso
isto
la
las
lle
lles
lo
los
mais
me
meu
meus
min
miña
miñas
moi
na
nas
neste
nin
no
non
nos
nosa
nosas
noso
nosos
nós
nun
nunha
nuns
nunhas
o
os
ou
ó
ós
para
pero
pode
pois
pola
polas
polo
polos
por
que
se
senón
ser
seu
seus
sexa
sido
sobre
súa
súas
tamén
tan
te
ten
teñen
teño
ter
teu
teus
ti
tido
tiña
tiven
túa
túas
un
unha
unhas
uns
vos
vosa
vosas
voso
vosos
vós
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(GalicianStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/hi/analyzer_hi.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hi
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/lang/in"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "hi"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
indicNormalizeFilter, err := cache.TokenFilterNamed(in.NormalizeName)
if err != nil {
return nil, err
}
hindiNormalizeFilter, err := cache.TokenFilterNamed(NormalizeName)
if err != nil {
return nil, err
}
stopHiFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerHiFilter, err := cache.TokenFilterNamed(StemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
indicNormalizeFilter,
hindiNormalizeFilter,
stopHiFilter,
stemmerHiFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/hi/analyzer_hi_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hi
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestHindiAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// two ways to write 'hindi' itself
{
input: []byte("हिन्दी"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("हिंद"),
Position: 1,
Start: 0,
End: 18,
},
},
},
{
input: []byte("हिंदी"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("हिंद"),
Position: 1,
Start: 0,
End: 15,
},
},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %v, got %v", test.output, actual)
}
}
}
================================================
FILE: analysis/lang/hi/hindi_normalize.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hi
import (
"bytes"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const NormalizeName = "normalize_hi"
type HindiNormalizeFilter struct {
}
func NewHindiNormalizeFilter() *HindiNormalizeFilter {
return &HindiNormalizeFilter{}
}
func (s *HindiNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
term := normalize(token.Term)
token.Term = term
}
return input
}
func normalize(input []byte) []byte {
runes := bytes.Runes(input)
for i := 0; i < len(runes); i++ {
switch runes[i] {
// dead n -> bindu
case '\u0928':
if i+1 < len(runes) && runes[i+1] == '\u094D' {
runes[i] = '\u0902'
runes = analysis.DeleteRune(runes, i+1)
}
// candrabindu -> bindu
case '\u0901':
runes[i] = '\u0902'
// nukta deletions
case '\u093C':
runes = analysis.DeleteRune(runes, i)
i--
case '\u0929':
runes[i] = '\u0928'
case '\u0931':
runes[i] = '\u0930'
case '\u0934':
runes[i] = '\u0933'
case '\u0958':
runes[i] = '\u0915'
case '\u0959':
runes[i] = '\u0916'
case '\u095A':
runes[i] = '\u0917'
case '\u095B':
runes[i] = '\u091C'
case '\u095C':
runes[i] = '\u0921'
case '\u095D':
runes[i] = '\u0922'
case '\u095E':
runes[i] = '\u092B'
case '\u095F':
runes[i] = '\u092F'
// zwj/zwnj -> delete
case '\u200D', '\u200C':
runes = analysis.DeleteRune(runes, i)
i--
// virama -> delete
case '\u094D':
runes = analysis.DeleteRune(runes, i)
i--
// chandra/short -> replace
case '\u0945', '\u0946':
runes[i] = '\u0947'
case '\u0949', '\u094A':
runes[i] = '\u094B'
case '\u090D', '\u090E':
runes[i] = '\u090F'
case '\u0911', '\u0912':
runes[i] = '\u0913'
case '\u0972':
runes[i] = '\u0905'
// long -> short ind. vowels
case '\u0906':
runes[i] = '\u0905'
case '\u0908':
runes[i] = '\u0907'
case '\u090A':
runes[i] = '\u0909'
case '\u0960':
runes[i] = '\u090B'
case '\u0961':
runes[i] = '\u090C'
case '\u0910':
runes[i] = '\u090F'
case '\u0914':
runes[i] = '\u0913'
// long -> short dep. vowels
case '\u0940':
runes[i] = '\u093F'
case '\u0942':
runes[i] = '\u0941'
case '\u0944':
runes[i] = '\u0943'
case '\u0963':
runes[i] = '\u0962'
case '\u0948':
runes[i] = '\u0947'
case '\u094C':
runes[i] = '\u094B'
}
}
return analysis.BuildTermFromRunes(runes)
}
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewHindiNormalizeFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/hi/hindi_normalize_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hi
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestHindiNormalizeFilter(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
// basics
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("अँगरेज़ी"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("अंगरेजि"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("अँगरेजी"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("अंगरेजि"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("अँग्रेज़ी"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("अंगरेजि"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("अँग्रेजी"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("अंगरेजि"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("अंगरेज़ी"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("अंगरेजि"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("अंगरेजी"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("अंगरेजि"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("अंग्रेज़ी"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("अंगरेजि"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("अंग्रेजी"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("अंगरेजि"),
},
},
},
// test decompositions
// removing nukta dot
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("क़िताब"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("किताब"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("फ़र्ज़"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("फरज"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("क़र्ज़"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("करज"),
},
},
},
// some other composed nukta forms
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ऱऴख़ग़ड़ढ़य़"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("रळखगडढय"),
},
},
},
// removal of format (ZWJ/ZWNJ)
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("शार्मा"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("शारमा"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("शार्मा"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("शारमा"),
},
},
},
// removal of chandra
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ॅॆॉॊऍऎऑऒ\u0972"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ेेोोएएओओअ"),
},
},
},
// vowel shortening
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("आईऊॠॡऐऔीूॄॣैौ"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("अइउऋऌएओिुृॢेो"),
},
},
},
// empty
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
}
hindiNormalizeFilter := NewHindiNormalizeFilter()
for _, test := range tests {
actual := hindiNormalizeFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %#v, got %#v", test.output, actual)
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/hi/hindi_stemmer_filter.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hi
import (
"bytes"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StemmerName = "stemmer_hi"
type HindiStemmerFilter struct {
}
func NewHindiStemmerFilter() *HindiStemmerFilter {
return &HindiStemmerFilter{}
}
func (s *HindiStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
// if not protected keyword, stem it
if !token.KeyWord {
stemmed := stem(token.Term)
token.Term = stemmed
}
}
return input
}
func stem(input []byte) []byte {
inputLen := utf8.RuneCount(input)
// 5
if inputLen > 6 &&
(bytes.HasSuffix(input, []byte("ाएंगी")) ||
bytes.HasSuffix(input, []byte("ाएंगे")) ||
bytes.HasSuffix(input, []byte("ाऊंगी")) ||
bytes.HasSuffix(input, []byte("ाऊंगा")) ||
bytes.HasSuffix(input, []byte("ाइयाँ")) ||
bytes.HasSuffix(input, []byte("ाइयों")) ||
bytes.HasSuffix(input, []byte("ाइयां"))) {
return analysis.TruncateRunes(input, 5)
}
// 4
if inputLen > 5 &&
(bytes.HasSuffix(input, []byte("ाएगी")) ||
bytes.HasSuffix(input, []byte("ाएगा")) ||
bytes.HasSuffix(input, []byte("ाओगी")) ||
bytes.HasSuffix(input, []byte("ाओगे")) ||
bytes.HasSuffix(input, []byte("एंगी")) ||
bytes.HasSuffix(input, []byte("ेंगी")) ||
bytes.HasSuffix(input, []byte("एंगे")) ||
bytes.HasSuffix(input, []byte("ेंगे")) ||
bytes.HasSuffix(input, []byte("ूंगी")) ||
bytes.HasSuffix(input, []byte("ूंगा")) ||
bytes.HasSuffix(input, []byte("ातीं")) ||
bytes.HasSuffix(input, []byte("नाओं")) ||
bytes.HasSuffix(input, []byte("नाएं")) ||
bytes.HasSuffix(input, []byte("ताओं")) ||
bytes.HasSuffix(input, []byte("ताएं")) ||
bytes.HasSuffix(input, []byte("ियाँ")) ||
bytes.HasSuffix(input, []byte("ियों")) ||
bytes.HasSuffix(input, []byte("ियां"))) {
return analysis.TruncateRunes(input, 4)
}
// 3
if inputLen > 4 &&
(bytes.HasSuffix(input, []byte("ाकर")) ||
bytes.HasSuffix(input, []byte("ाइए")) ||
bytes.HasSuffix(input, []byte("ाईं")) ||
bytes.HasSuffix(input, []byte("ाया")) ||
bytes.HasSuffix(input, []byte("ेगी")) ||
bytes.HasSuffix(input, []byte("ेगा")) ||
bytes.HasSuffix(input, []byte("ोगी")) ||
bytes.HasSuffix(input, []byte("ोगे")) ||
bytes.HasSuffix(input, []byte("ाने")) ||
bytes.HasSuffix(input, []byte("ाना")) ||
bytes.HasSuffix(input, []byte("ाते")) ||
bytes.HasSuffix(input, []byte("ाती")) ||
bytes.HasSuffix(input, []byte("ाता")) ||
bytes.HasSuffix(input, []byte("तीं")) ||
bytes.HasSuffix(input, []byte("ाओं")) ||
bytes.HasSuffix(input, []byte("ाएं")) ||
bytes.HasSuffix(input, []byte("ुओं")) ||
bytes.HasSuffix(input, []byte("ुएं")) ||
bytes.HasSuffix(input, []byte("ुआं"))) {
return analysis.TruncateRunes(input, 3)
}
// 2
if inputLen > 3 &&
(bytes.HasSuffix(input, []byte("कर")) ||
bytes.HasSuffix(input, []byte("ाओ")) ||
bytes.HasSuffix(input, []byte("िए")) ||
bytes.HasSuffix(input, []byte("ाई")) ||
bytes.HasSuffix(input, []byte("ाए")) ||
bytes.HasSuffix(input, []byte("ने")) ||
bytes.HasSuffix(input, []byte("नी")) ||
bytes.HasSuffix(input, []byte("ना")) ||
bytes.HasSuffix(input, []byte("ते")) ||
bytes.HasSuffix(input, []byte("ीं")) ||
bytes.HasSuffix(input, []byte("ती")) ||
bytes.HasSuffix(input, []byte("ता")) ||
bytes.HasSuffix(input, []byte("ाँ")) ||
bytes.HasSuffix(input, []byte("ां")) ||
bytes.HasSuffix(input, []byte("ों")) ||
bytes.HasSuffix(input, []byte("ें"))) {
return analysis.TruncateRunes(input, 2)
}
// 1
if inputLen > 2 &&
(bytes.HasSuffix(input, []byte("ो")) ||
bytes.HasSuffix(input, []byte("े")) ||
bytes.HasSuffix(input, []byte("ू")) ||
bytes.HasSuffix(input, []byte("ु")) ||
bytes.HasSuffix(input, []byte("ी")) ||
bytes.HasSuffix(input, []byte("ि")) ||
bytes.HasSuffix(input, []byte("ा"))) {
return analysis.TruncateRunes(input, 1)
}
return input
}
func StemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewHindiStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(StemmerName, StemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/hi/hindi_stemmer_filter_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hi
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestHindiStemmerFilter(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
// masc noun inflections
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("लडका"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("लडक"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("लडके"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("लडक"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("लडकों"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("लडक"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("गुरु"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("गुर"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("गुरुओं"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("गुर"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("दोस्त"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("दोस्त"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("दोस्तों"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("दोस्त"),
},
},
},
// feminine noun inflections
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("लडकी"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("लडक"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("लडकियों"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("लडक"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("किताब"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("किताब"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("किताबें"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("किताब"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("किताबों"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("किताब"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("आध्यापीका"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("आध्यापीक"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("आध्यापीकाएं"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("आध्यापीक"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("आध्यापीकाओं"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("आध्यापीक"),
},
},
},
// some verb forms
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("खाना"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("खा"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("खाता"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("खा"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("खाती"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("खा"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("खा"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("खा"),
},
},
},
// exceptions
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("कठिनाइयां"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("कठिन"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("कठिन"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("कठिन"),
},
},
},
// empty
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
}
hindiStemmerFilter := NewHindiStemmerFilter()
for _, test := range tests {
actual := hindiStemmerFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %#v, got %#v", test.output, actual)
t.Errorf("expected % x, got % x", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/hi/stop_filter_hi.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hi
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/hi/stop_words_hi.go
================================================
package hi
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_hi"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
// ` was changed to ' to allow for literal string
var HindiStopWords = []byte(`# Also see http://www.opensource.org/licenses/bsd-license.html
# See http://members.unine.ch/jacques.savoy/clef/index.html.
# This file was created by Jacques Savoy and is distributed under the BSD license.
# Note: by default this file also contains forms normalized by HindiNormalizer
# for spelling variation (see section below), such that it can be used whether or
# not you enable that feature. When adding additional entries to this list,
# please add the normalized form as well.
अंदर
अत
अपना
अपनी
अपने
अभी
आदि
आप
इत्यादि
इन
इनका
इन्हीं
इन्हें
इन्हों
इस
इसका
इसकी
इसके
इसमें
इसी
इसे
उन
उनका
उनकी
उनके
उनको
उन्हीं
उन्हें
उन्हों
उस
उसके
उसी
उसे
एक
एवं
एस
ऐसे
और
कई
कर
करता
करते
करना
करने
करें
कहते
कहा
का
काफ़ी
कि
कितना
किन्हें
किन्हों
किया
किर
किस
किसी
किसे
की
कुछ
कुल
के
को
कोई
कौन
कौनसा
गया
घर
जब
जहाँ
जा
जितना
जिन
जिन्हें
जिन्हों
जिस
जिसे
जीधर
जैसा
जैसे
जो
तक
तब
तरह
तिन
तिन्हें
तिन्हों
तिस
तिसे
तो
था
थी
थे
दबारा
दिया
दुसरा
दूसरे
दो
द्वारा
न
नहीं
ना
निहायत
नीचे
ने
पर
पर
पहले
पूरा
पे
फिर
बनी
बही
बहुत
बाद
बाला
बिलकुल
भी
भीतर
मगर
मानो
मे
में
यदि
यह
यहाँ
यही
या
यिह
ये
रखें
रहा
रहे
ऱ्वासा
लिए
लिये
लेकिन
व
वर्ग
वह
वह
वहाँ
वहीं
वाले
वुह
वे
वग़ैरह
संग
सकता
सकते
सबसे
सभी
साथ
साबुत
साभ
सारा
से
सो
ही
हुआ
हुई
हुए
है
हैं
हो
होता
होती
होते
होना
होने
# additional normalized forms of the above
अपनि
जेसे
होति
सभि
तिंहों
इंहों
दवारा
इसि
किंहें
थि
उंहों
ओर
जिंहें
वहिं
अभि
बनि
हि
उंहिं
उंहें
हें
वगेरह
एसे
रवासा
कोन
निचे
काफि
उसि
पुरा
भितर
हे
बहि
वहां
कोइ
यहां
जिंहों
तिंहें
किसि
कइ
यहि
इंहिं
जिधर
इंहें
अदि
इतयादि
हुइ
कोनसा
इसकि
दुसरे
जहां
अप
किंहों
उनकि
भि
वरग
हुअ
जेसा
नहिं
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(HindiStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/hr/analyzer_hr.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hr
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
// Originated from: http://nlp.ffzg.hr/resources/tools/stemmer-for-croatian/
const AnalyzerName = "hr"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
suffixFilter, err := cache.TokenFilterNamed(SuffixTransformationFilterName)
if err != nil {
return nil, err
}
stemmerFilter, err := cache.TokenFilterNamed(StemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: unicodeTokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopFilter,
suffixFilter,
stemmerFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/hr/analyzer_hr_test.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hr
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestCroatianAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stemming
{
input: []byte("Hrvatska"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("hrvatsk"),
},
},
},
{
input: []byte("Hrvatski"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("hrvatsk"),
},
},
},
// uppercase letters
{
input: []byte("KOMARAC"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("komarc"),
},
},
},
// vowelR
{
input: []byte("crvi"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("crv"),
},
},
},
// stop word
{
input: []byte("biti"),
output: analysis.TokenStream{},
},
// suffix transformation
{
input: []byte("zaključcima"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("zaključk"),
},
},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}
================================================
FILE: analysis/lang/hr/stemmer_hr.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hr
import (
"regexp"
"strings"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StemmerName = "stemmer_hr"
// These regular expressions rules originated from:
// http://nlp.ffzg.hr/resources/tools/stemmer-for-croatian/
var stemmingRules = []*regexp.Regexp{
regexp.MustCompile(`^(.+(s|š)k)(ijima|ijega|ijemu|ijem|ijim|ijih|ijoj|ijeg|iji|ije|ija|oga|ome|omu|ima|og|om|im|ih|oj|i|e|o|a|u)$`),
regexp.MustCompile(`^(.+(s|š)tv)(ima|om|o|a|u)$`),
regexp.MustCompile(`^(.+(t|m|p|r|g)anij)(ama|ima|om|a|u|e|i|)$`),
regexp.MustCompile(`^(.+an)(inom|ina|inu|ine|ima|in|om|u|i|a|e|)$`),
regexp.MustCompile(`^(.+in)(ima|ama|om|a|e|i|u|o|)$`),
regexp.MustCompile(`^(.+on)(ovima|ova|ove|ovi|ima|om|a|e|i|u|)$`),
regexp.MustCompile(`^(.+n)(ijima|ijega|ijemu|ijeg|ijem|ijim|ijih|ijoj|iji|ije|ija|iju|ima|ome|omu|oga|oj|om|ih|im|og|o|e|a|u|i|)$`),
regexp.MustCompile(`^(.+(a|e|u)ć)(oga|ome|omu|ega|emu|ima|oj|ih|om|eg|em|og|uh|im|e|a)$`),
regexp.MustCompile(`^(.+ugov)(ima|i|e|a)$`),
regexp.MustCompile(`^(.+ug)(ama|om|a|e|i|u|o)$`),
regexp.MustCompile(`^(.+log)(ama|om|a|u|e|)$`),
regexp.MustCompile(`^(.+[^eo]g)(ovima|ama|ovi|ove|ova|om|a|e|i|u|o|)$`),
regexp.MustCompile(`^(.+(rrar|ott|ss|ll)i)(jem|ja|ju|o|)$`),
regexp.MustCompile(`^(.+uj)(ući|emo|ete|mo|em|eš|e|u|)$`),
regexp.MustCompile(`^(.+(c|č|ć|đ|l|r)aj)(evima|evi|eva|eve|ama|ima|em|a|e|i|u|)$`),
regexp.MustCompile(`^(.+(b|c|d|l|n|m|ž|g|f|p|r|s|t|z)ij)(ima|ama|om|a|e|i|u|o|)$`),
regexp.MustCompile(`^(.+[^z]nal)(ima|ama|om|a|e|i|u|o|)$`),
regexp.MustCompile(`^(.+ijal)(ima|ama|om|a|e|i|u|o|)$`),
regexp.MustCompile(`^(.+ozil)(ima|om|a|e|u|i|)$`),
regexp.MustCompile(`^(.+olov)(ima|i|a|e)$`),
regexp.MustCompile(`^(.+ol)(ima|om|a|u|e|i|)$`),
regexp.MustCompile(`^(.+lem)(ama|ima|om|a|e|i|u|o|)$`),
regexp.MustCompile(`^(.+ram)(ama|om|a|e|i|u|o)$`),
regexp.MustCompile(`^(.+(a|d|e|o)r)(ama|ima|om|u|a|e|i|)$`),
regexp.MustCompile(`^(.+(e|i)s)(ima|om|e|a|u)$`),
regexp.MustCompile(`^(.+(t|n|j|k|j|t|b|g|v)aš)(ama|ima|om|em|a|u|i|e|)$`),
regexp.MustCompile(`^(.+(e|i)š)(ima|ama|om|em|i|e|a|u|)$`),
regexp.MustCompile(`^(.+ikat)(ima|om|a|e|i|u|o|)$`),
regexp.MustCompile(`^(.+lat)(ima|om|a|e|i|u|o|)$`),
regexp.MustCompile(`^(.+et)(ama|ima|om|a|e|i|u|o|)$`),
regexp.MustCompile(`^(.+(e|i|k|o)st)(ima|ama|om|a|e|i|u|o|)$`),
regexp.MustCompile(`^(.+išt)(ima|em|a|e|u)$`),
regexp.MustCompile(`^(.+ova)(smo|ste|hu|ti|še|li|la|le|lo|t|h|o)$`),
regexp.MustCompile(`^(.+(a|e|i)v)(ijemu|ijima|ijega|ijeg|ijem|ijim|ijih|ijoj|oga|ome|omu|ima|ama|iji|ije|ija|iju|im|ih|oj|om|og|i|a|u|e|o|)$`),
regexp.MustCompile(`^(.+[^dkml]ov)(ijemu|ijima|ijega|ijeg|ijem|ijim|ijih|ijoj|oga|ome|omu|ima|iji|ije|ija|iju|im|ih|oj|om|og|i|a|u|e|o|)$`),
regexp.MustCompile(`^(.+(m|l)ov)(ima|om|a|u|e|i|)$`),
regexp.MustCompile(`^(.+el)(ijemu|ijima|ijega|ijeg|ijem|ijim|ijih|ijoj|oga|ome|omu|ima|iji|ije|ija|iju|im|ih|oj|om|og|i|a|u|e|o|)$`),
regexp.MustCompile(`^(.+(a|e|š)nj)(ijemu|ijima|ijega|ijeg|ijem|ijim|ijih|ijoj|oga|ome|omu|ima|iji|ije|ija|iju|ega|emu|eg|em|im|ih|oj|om|og|a|e|i|o|u)$`),
regexp.MustCompile(`^(.+čin)(ama|ome|omu|oga|ima|og|om|im|ih|oj|a|u|i|o|e|)$`),
regexp.MustCompile(`^(.+roši)(vši|smo|ste|še|mo|te|ti|li|la|lo|le|m|š|t|h|o)$`),
regexp.MustCompile(`^(.+oš)(ijemu|ijima|ijega|ijeg|ijem|ijim|ijih|ijoj|oga|ome|omu|ima|iji|ije|ija|iju|im|ih|oj|om|og|i|a|u|e|)$`),
regexp.MustCompile(`^(.+(e|o)vit)(ijima|ijega|ijemu|ijem|ijim|ijih|ijoj|ijeg|iji|ije|ija|oga|ome|omu|ima|og|om|im|ih|oj|i|e|o|a|u|)$`),
regexp.MustCompile(`^(.+ast)(ijima|ijega|ijemu|ijem|ijim|ijih|ijoj|ijeg|iji|ije|ija|oga|ome|omu|ima|og|om|im|ih|oj|i|e|o|a|u|)$`),
regexp.MustCompile(`^(.+k)(ijemu|ijima|ijega|ijeg|ijem|ijim|ijih|ijoj|oga|ome|omu|ima|iji|ije|ija|iju|im|ih|oj|om|og|i|a|u|e|o|)$`),
regexp.MustCompile(`^(.+(e|a|i|u)va)(jući|smo|ste|jmo|jte|ju|la|le|li|lo|mo|na|ne|ni|no|te|ti|še|hu|h|j|m|n|o|t|v|š|)$`),
regexp.MustCompile(`^(.+ir)(ujemo|ujete|ujući|ajući|ivat|ujem|uješ|ujmo|ujte|avši|asmo|aste|ati|amo|ate|aju|aše|ahu|ala|alo|ali|ale|uje|uju|uj|al|an|am|aš|at|ah|ao)$`),
regexp.MustCompile(`^(.+ač)(ismo|iste|iti|imo|ite|iše|eći|ila|ilo|ili|ile|ena|eno|eni|ene|io|im|iš|it|ih|en|i|e)$`),
regexp.MustCompile(`^(.+ača)(vši|smo|ste|smo|ste|hu|ti|mo|te|še|la|lo|li|le|ju|na|no|ni|ne|o|m|š|t|h|n)$`),
regexp.MustCompile(`^(.+n)(uvši|usmo|uste|ući|imo|ite|emo|ete|ula|ulo|ule|uli|uto|uti|uta|em|eš|uo|ut|e|u|i)$`),
regexp.MustCompile(`^(.+ni)(vši|smo|ste|ti|mo|te|mo|te|la|lo|le|li|m|š|o)$`),
regexp.MustCompile(`^(.+((a|r|i|p|e|u)st|[^o]g|ik|uc|oj|aj|lj|ak|ck|čk|šk|uk|nj|im|ar|at|et|št|it|ot|ut|zn|zv)a)(jući|vši|smo|ste|jmo|jte|jem|mo|te|je|ju|ti|še|hu|la|li|le|lo|na|no|ni|ne|t|h|o|j|n|m|š)$`),
regexp.MustCompile(`^(.+ur)(ajući|asmo|aste|ajmo|ajte|amo|ate|aju|ati|aše|ahu|ala|ali|ale|alo|ana|ano|ani|ane|al|at|ah|ao|aj|an|am|aš)$`),
regexp.MustCompile(`^(.+(a|i|o)staj)(asmo|aste|ahu|ati|emo|ete|aše|ali|ući|ala|alo|ale|mo|ao|em|eš|at|ah|te|e|u|)$`),
regexp.MustCompile(`^(.+(b|c|č|ć|d|e|f|g|j|k|n|r|t|u|v)a)(lama|lima|lom|lu|li|la|le|lo|l)$`),
regexp.MustCompile(`^(.+(t|č|j|ž|š)aj)(evima|evi|eva|eve|ama|ima|em|a|e|i|u|)$`),
regexp.MustCompile(`^(.+([^o]m|ič|nč|uč|b|c|ć|d|đ|h|j|k|l|n|p|r|s|š|v|z|ž)a)(jući|vši|smo|ste|jmo|jte|mo|te|ju|ti|še|hu|la|li|le|lo|na|no|ni|ne|t|h|o|j|n|m|š)$`),
regexp.MustCompile(`^(.+(a|i|o)sta)(dosmo|doste|doše|nemo|demo|nete|dete|nimo|nite|nila|vši|nem|dem|neš|deš|doh|de|ti|ne|nu|du|la|li|lo|le|t|o)$`),
regexp.MustCompile(`^(.+ta)(smo|ste|jmo|jte|vši|ti|mo|te|ju|še|la|lo|le|li|na|no|ni|ne|n|j|o|m|š|t|h)$`),
regexp.MustCompile(`^(.+inj)(asmo|aste|ati|emo|ete|ali|ala|alo|ale|aše|ahu|em|eš|at|ah|ao)$`),
regexp.MustCompile(`^(.+as)(temo|tete|timo|tite|tući|tem|teš|tao|te|li|ti|la|lo|le)$`),
regexp.MustCompile(`^(.+(elj|ulj|tit|ac|ič|od|oj|et|av|ov)i)(vši|eći|smo|ste|še|mo|te|ti|li|la|lo|le|m|š|t|h|o)$`),
regexp.MustCompile(`^(.+(tit|jeb|ar|ed|uš|ič)i)(jemo|jete|jem|ješ|smo|ste|jmo|jte|vši|mo|še|te|ti|ju|je|la|lo|li|le|t|m|š|h|j|o)$`),
regexp.MustCompile(`^(.+(b|č|d|l|m|p|r|s|š|ž)i)(jemo|jete|jem|ješ|smo|ste|jmo|jte|vši|mo|lu|še|te|ti|ju|je|la|lo|li|le|t|m|š|h|j|o)$`),
regexp.MustCompile(`^(.+luč)(ujete|ujući|ujemo|ujem|uješ|ismo|iste|ujmo|ujte|uje|uju|iše|iti|imo|ite|ila|ilo|ili|ile|ena|eno|eni|ene|uj|io|en|im|iš|it|ih|e|i)$`),
regexp.MustCompile(`^(.+jeti)(smo|ste|še|mo|te|ti|li|la|lo|le|m|š|t|h|o)$`),
regexp.MustCompile(`^(.+e)(lama|lima|lom|lu|li|la|le|lo|l)$`),
regexp.MustCompile(`^(.+i)(lama|lima|lom|lu|li|la|le|lo|l)$`),
regexp.MustCompile(`^(.+at)(ijega|ijemu|ijima|ijeg|ijem|ijih|ijim|ima|oga|ome|omu|iji|ije|ija|iju|oj|og|om|im|ih|a|u|i|e|o|)$`),
regexp.MustCompile(`^(.+et)(avši|ući|emo|imo|em|eš|e|u|i)$`),
regexp.MustCompile(`^(.+)(ajući|alima|alom|avši|asmo|aste|ajmo|ajte|ivši|amo|ate|aju|ati|aše|ahu|ali|ala|ale|alo|ana|ano|ani|ane|am|aš|at|ah|ao|aj|an)$`),
regexp.MustCompile(`^(.+)(anje|enje|anja|enja|enom|enoj|enog|enim|enih|anom|anoj|anog|anim|anih|eno|ovi|ova|oga|ima|ove|enu|anu|ena|ama)$`),
regexp.MustCompile(`^(.+)(nijega|nijemu|nijima|nijeg|nijem|nijim|nijih|nima|niji|nije|nija|niju|noj|nom|nog|nim|nih|an|na|nu|ni|ne|no)$`),
regexp.MustCompile(`^(.+)(om|og|im|ih|em|oj|an|u|o|i|e|a)$`),
}
var highlightVowelRRegex = regexp.MustCompile(`(^|[^aeiou])r($|[^aeiou])`)
func highlightVowelR(term string) string {
return highlightVowelRRegex.ReplaceAllString(term, `${1}R${2}`)
}
func hasVowel(term string) bool {
term = highlightVowelR(term)
return strings.ContainsAny(term, "aeiouR")
}
func stem(term string) string {
for _, rule := range stemmingRules {
results := rule.FindStringSubmatch(term)
if len(results) == 0 {
continue
}
root := results[1]
if hasVowel(root) && root != "" {
return root
}
}
return term
}
type CroatianStemmerFilter struct{}
func NewCroatianStemmerFilter() *CroatianStemmerFilter {
return &CroatianStemmerFilter{}
}
func (s *CroatianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
token.Term = []byte(stem(string(token.Term)))
}
return input
}
func CroatianStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewCroatianStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(StemmerName, CroatianStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/hr/stop_filter_hr.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hr
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/hr/stop_words_hr.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hr
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_hr"
var CroatianStopWords = []byte(`biti
jesam
budem
sam
jesi
budeš
si
jesmo
budemo
smo
jeste
budete
ste
jesu
budu
su
bih
bijah
bjeh
bijaše
bi
bje
bješe
bijasmo
bismo
bjesmo
bijaste
biste
bjeste
bijahu
biste
bjeste
bijahu
bi
biše
bjehu
bješe
bio
bili
budimo
budite
bila
bilo
bile
ću
ćeš
će
ćemo
ćete
želim
želiš
želi
želimo
želite
žele
moram
moraš
mora
moramo
morate
moraju
trebam
trebaš
treba
trebamo
trebate
trebaju
mogu
možeš
može
možemo
možete
za
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(CroatianStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/hr/suffix_transformation_hr.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hr
import (
"strings"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const SuffixTransformationFilterName = "hr_suffix_transformation_filter"
var SuffixTransformations = map[string]string{
"lozi": "loga",
"lozima": "loga",
"pjesi": "pjeh",
"pjesima": "pjeh",
"vojci": "vojka",
"bojci": "bojka",
"jaci": "jak",
"jacima": "jak",
"čajan": "čajni",
"ijeran": "ijerni",
"laran": "larni",
"ijesan": "ijesni",
"anjac": "anjca",
"ajac": "ajca",
"ajaca": "ajca",
"ljaca": "ljca",
"ljac": "ljca",
"ejac": "ejca",
"ejaca": "ejca",
"ojac": "ojca",
"ojaca": "ojca",
"ajaka": "ajka",
"ojaka": "ojka",
"šaca": "šca",
"šac": "šca",
"inzima": "ing",
"inzi": "ing",
"tvenici": "tvenik",
"tetici": "tetika",
"teticima": "tetika",
"nstava": "nstva",
"nicima": "nik",
"ticima": "tik",
"zicima": "zik",
"snici": "snik",
"kuse": "kusi",
"kusan": "kusni",
"kustava": "kustva",
"dušan": "dušni",
"antan": "antni",
"bilan": "bilni",
"tilan": "tilni",
"avilan": "avilni",
"silan": "silni",
"gilan": "gilni",
"rilan": "rilni",
"nilan": "nilni",
"alan": "alni",
"ozan": "ozni",
"rave": "ravi",
"stavan": "stavni",
"pravan": "pravni",
"tivan": "tivni",
"sivan": "sivni",
"atan": "atni",
"cenata": "centa",
"denata": "denta",
"genata": "genta",
"lenata": "lenta",
"menata": "menta",
"jenata": "jenta",
"venata": "venta",
"tetan": "tetni",
"pletan": "pletni",
"šave": "šavi",
"manata": "manta",
"tanata": "tanta",
"lanata": "lanta",
"sanata": "santa",
"ačak": "ačka",
"ačaka": "ačka",
"ušak": "uška",
"atak": "atka",
"ataka": "atka",
"atci": "atka",
"atcima": "atka",
"etak": "etka",
"etaka": "etka",
"itak": "itka",
"itaka": "itka",
"itci": "itka",
"otak": "otka",
"otaka": "otka",
"utak": "utka",
"utaka": "utka",
"utci": "utka",
"utcima": "utka",
"eskan": "eskna",
"tičan": "tični",
"ojsci": "ojska",
"esama": "esma",
"metara": "metra",
"centar": "centra",
"centara": "centra",
"istara": "istra",
"istar": "istra",
"ošću": "osti",
"daba": "dba",
"čcima": "čka",
"čci": "čka",
"mac": "mca",
"maca": "mca",
"voljan": "voljni",
"anaka": "anki",
"vac": "vca",
"vaca": "vca",
"saca": "sca",
"sac": "sca",
"naca": "nca",
"nac": "nca",
"raca": "rca",
"rac": "rca",
"aoca": "alca",
"alaca": "alca",
"alac": "alca",
"elaca": "elca",
"elac": "elca",
"olaca": "olca",
"olac": "olca",
"olce": "olca",
"njac": "njca",
"njaca": "njca",
"ekata": "ekta",
"ekat": "ekta",
"izam": "izma",
"izama": "izma",
"jebe": "jebi",
"ašan": "ašni",
}
type SuffixTransformationFilter struct{}
func NewSuffixTransformationFilter() *SuffixTransformationFilter {
return &SuffixTransformationFilter{}
}
func (s *SuffixTransformationFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
term := string(token.Term)
for suffix, newSuffix := range SuffixTransformations {
if strings.HasSuffix(term, suffix) {
term = term[:len(term)-len(suffix)] + newSuffix
break
}
}
token.Term = []byte(term)
}
return input
}
func SuffixTransformationFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewSuffixTransformationFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(SuffixTransformationFilterName, SuffixTransformationFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/hu/analyzer_hu.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hu
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "hu"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopHuFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerHuFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: unicodeTokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopHuFilter,
stemmerHuFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/hu/analyzer_hu_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hu
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestHungarianAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stemming
{
input: []byte("babakocsi"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("babakocs"),
},
},
},
{
input: []byte("babakocsijáért"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("babakocs"),
},
},
},
// stop word
{
input: []byte("által"),
output: analysis.TokenStream{},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}
================================================
FILE: analysis/lang/hu/stemmer_hu.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hu
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/snowballstem"
"github.com/blevesearch/snowballstem/hungarian"
)
const SnowballStemmerName = "stemmer_hu_snowball"
type HungarianStemmerFilter struct {
}
func NewHungarianStemmerFilter() *HungarianStemmerFilter {
return &HungarianStemmerFilter{}
}
func (s *HungarianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
env := snowballstem.NewEnv(string(token.Term))
hungarian.Stem(env)
token.Term = []byte(env.Current())
}
return input
}
func HungarianStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewHungarianStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(SnowballStemmerName, HungarianStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/hu/stop_filter_hu.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hu
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/hu/stop_words_hu.go
================================================
package hu
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_hu"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var HungarianStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| Hungarian stop word list
| prepared by Anna Tordai
a
ahogy
ahol
aki
akik
akkor
alatt
által
általában
amely
amelyek
amelyekben
amelyeket
amelyet
amelynek
ami
amit
amolyan
amíg
amikor
át
abban
ahhoz
annak
arra
arról
az
azok
azon
azt
azzal
azért
aztán
azután
azonban
bár
be
belül
benne
cikk
cikkek
cikkeket
csak
de
e
eddig
egész
egy
egyes
egyetlen
egyéb
egyik
egyre
ekkor
el
elég
ellen
elő
először
előtt
első
én
éppen
ebben
ehhez
emilyen
ennek
erre
ez
ezt
ezek
ezen
ezzel
ezért
és
fel
felé
hanem
hiszen
hogy
hogyan
igen
így
illetve
ill.
ill
ilyen
ilyenkor
ison
ismét
itt
jó
jól
jobban
kell
kellett
keresztül
keressünk
ki
kívül
között
közül
legalább
lehet
lehetett
legyen
lenne
lenni
lesz
lett
maga
magát
majd
majd
már
más
másik
meg
még
mellett
mert
mely
melyek
mi
mit
míg
miért
milyen
mikor
minden
mindent
mindenki
mindig
mint
mintha
mivel
most
nagy
nagyobb
nagyon
ne
néha
nekem
neki
nem
néhány
nélkül
nincs
olyan
ott
össze
ő
ők
őket
pedig
persze
rá
s
saját
sem
semmi
sok
sokat
sokkal
számára
szemben
szerint
szinte
talán
tehát
teljes
tovább
továbbá
több
úgy
ugyanis
új
újabb
újra
után
utána
utolsó
vagy
vagyis
valaki
valami
valamint
való
vagyok
van
vannak
volt
voltam
voltak
voltunk
vissza
vele
viszont
volna
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(HungarianStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/hy/stop_filter_hy.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package hy
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/hy/stop_words_hy.go
================================================
package hy
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_hy"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
// ` was changed to ' to allow for literal string
var ArmenianStopWords = []byte(`# example set of Armenian stopwords.
այդ
այլ
այն
այս
դու
դուք
եմ
են
ենք
ես
եք
է
էի
էին
էինք
էիր
էիք
էր
ըստ
թ
ի
ին
իսկ
իր
կամ
համար
հետ
հետո
մենք
մեջ
մի
ն
նա
նաև
նրա
նրանք
որ
որը
որոնք
որպես
ու
ում
պիտի
վրա
և
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(ArmenianStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/id/stop_filter_id.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package id
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/id/stop_words_id.go
================================================
package id
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_id"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
// ` was changed to ' to allow for literal string
var IndonesianStopWords = []byte(`# from appendix D of: A Study of Stemming Effects on Information
# Retrieval in Bahasa Indonesia
ada
adanya
adalah
adapun
agak
agaknya
agar
akan
akankah
akhirnya
aku
akulah
amat
amatlah
anda
andalah
antar
diantaranya
antara
antaranya
diantara
apa
apaan
mengapa
apabila
apakah
apalagi
apatah
atau
ataukah
ataupun
bagai
bagaikan
sebagai
sebagainya
bagaimana
bagaimanapun
sebagaimana
bagaimanakah
bagi
bahkan
bahwa
bahwasanya
sebaliknya
banyak
sebanyak
beberapa
seberapa
begini
beginian
beginikah
beginilah
sebegini
begitu
begitukah
begitulah
begitupun
sebegitu
belum
belumlah
sebelum
sebelumnya
sebenarnya
berapa
berapakah
berapalah
berapapun
betulkah
sebetulnya
biasa
biasanya
bila
bilakah
bisa
bisakah
sebisanya
boleh
bolehkah
bolehlah
buat
bukan
bukankah
bukanlah
bukannya
cuma
percuma
dahulu
dalam
dan
dapat
dari
daripada
dekat
demi
demikian
demikianlah
sedemikian
dengan
depan
di
dia
dialah
dini
diri
dirinya
terdiri
dong
dulu
enggak
enggaknya
entah
entahlah
terhadap
terhadapnya
hal
hampir
hanya
hanyalah
harus
haruslah
harusnya
seharusnya
hendak
hendaklah
hendaknya
hingga
sehingga
ia
ialah
ibarat
ingin
inginkah
inginkan
ini
inikah
inilah
itu
itukah
itulah
jangan
jangankan
janganlah
jika
jikalau
juga
justru
kala
kalau
kalaulah
kalaupun
kalian
kami
kamilah
kamu
kamulah
kan
kapan
kapankah
kapanpun
dikarenakan
karena
karenanya
ke
kecil
kemudian
kenapa
kepada
kepadanya
ketika
seketika
khususnya
kini
kinilah
kiranya
sekiranya
kita
kitalah
kok
lagi
lagian
selagi
lah
lain
lainnya
melainkan
selaku
lalu
melalui
terlalu
lama
lamanya
selama
selama
selamanya
lebih
terlebih
bermacam
macam
semacam
maka
makanya
makin
malah
malahan
mampu
mampukah
mana
manakala
manalagi
masih
masihkah
semasih
masing
mau
maupun
semaunya
memang
mereka
merekalah
meski
meskipun
semula
mungkin
mungkinkah
nah
namun
nanti
nantinya
nyaris
oleh
olehnya
seorang
seseorang
pada
padanya
padahal
paling
sepanjang
pantas
sepantasnya
sepantasnyalah
para
pasti
pastilah
per
pernah
pula
pun
merupakan
rupanya
serupa
saat
saatnya
sesaat
saja
sajalah
saling
bersama
sama
sesama
sambil
sampai
sana
sangat
sangatlah
saya
sayalah
se
sebab
sebabnya
sebuah
tersebut
tersebutlah
sedang
sedangkan
sedikit
sedikitnya
segala
segalanya
segera
sesegera
sejak
sejenak
sekali
sekalian
sekalipun
sesekali
sekaligus
sekarang
sekarang
sekitar
sekitarnya
sela
selain
selalu
seluruh
seluruhnya
semakin
sementara
sempat
semua
semuanya
sendiri
sendirinya
seolah
seperti
sepertinya
sering
seringnya
serta
siapa
siapakah
siapapun
disini
disinilah
sini
sinilah
sesuatu
sesuatunya
suatu
sesudah
sesudahnya
sudah
sudahkah
sudahlah
supaya
tadi
tadinya
tak
tanpa
setelah
telah
tentang
tentu
tentulah
tentunya
tertentu
seterusnya
tapi
tetapi
setiap
tiap
setidaknya
tidak
tidakkah
tidaklah
toh
waduh
wah
wahai
sewaktu
walau
walaupun
wong
yaitu
yakni
yang
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(IndonesianStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/in/indic_normalize.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package in
import (
"bytes"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const NormalizeName = "normalize_in"
type IndicNormalizeFilter struct {
}
func NewIndicNormalizeFilter() *IndicNormalizeFilter {
return &IndicNormalizeFilter{}
}
func (s *IndicNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
runes := bytes.Runes(token.Term)
runes = normalize(runes)
token.Term = analysis.BuildTermFromRunes(runes)
}
return input
}
func NormalizerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewIndicNormalizeFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(NormalizeName, NormalizerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/in/indic_normalize_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package in
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestIndicNormalizeFilter(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
// basics
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("अाॅअाॅ"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ऑऑ"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("अाॆअाॆ"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ऒऒ"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("अाेअाे"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ओओ"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("अाैअाै"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("औऔ"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("अाअा"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("आआ"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("अाैर"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("और"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ত্"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ৎ"),
},
},
},
// empty term
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte(""),
},
},
},
}
indicNormalizeFilter := NewIndicNormalizeFilter()
for _, test := range tests {
actual := indicNormalizeFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %#v, got %#v", test.output, actual)
t.Errorf("expected % x, got % x for % x", test.output[0].Term, actual[0].Term, test.input[0].Term)
t.Errorf("expected %s, got %s for %s", test.output[0].Term, actual[0].Term, test.input[0].Term)
}
}
}
================================================
FILE: analysis/lang/in/scripts.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package in
import (
"unicode"
"github.com/bits-and-blooms/bitset"
"github.com/blevesearch/bleve/v2/analysis"
)
type ScriptData struct {
flag rune
base rune
decompMask *bitset.BitSet
}
var scripts = map[*unicode.RangeTable]*ScriptData{
unicode.Devanagari: {
flag: 1,
base: 0x0900,
},
unicode.Bengali: {
flag: 2,
base: 0x0980,
},
unicode.Gurmukhi: {
flag: 4,
base: 0x0A00,
},
unicode.Gujarati: {
flag: 8,
base: 0x0A80,
},
unicode.Oriya: {
flag: 16,
base: 0x0B00,
},
unicode.Tamil: {
flag: 32,
base: 0x0B80,
},
unicode.Telugu: {
flag: 64,
base: 0x0C00,
},
unicode.Kannada: {
flag: 128,
base: 0x0C80,
},
unicode.Malayalam: {
flag: 256,
base: 0x0D00,
},
}
func flag(ub *unicode.RangeTable) rune {
return scripts[ub].flag
}
var decompositions = [][]rune{
/* devanagari, gujarati vowel candra O */
{0x05, 0x3E, 0x45, 0x11, flag(unicode.Devanagari) | flag(unicode.Gujarati)},
/* devanagari short O */
{0x05, 0x3E, 0x46, 0x12, flag(unicode.Devanagari)},
/* devanagari, gujarati letter O */
{0x05, 0x3E, 0x47, 0x13, flag(unicode.Devanagari) | flag(unicode.Gujarati)},
/* devanagari letter AI, gujarati letter AU */
{0x05, 0x3E, 0x48, 0x14, flag(unicode.Devanagari) | flag(unicode.Gujarati)},
/* devanagari, bengali, gurmukhi, gujarati, oriya AA */
{0x05, 0x3E, -1, 0x06, flag(unicode.Devanagari) | flag(unicode.Bengali) | flag(unicode.Gurmukhi) | flag(unicode.Gujarati) | flag(unicode.Oriya)},
/* devanagari letter candra A */
{0x05, 0x45, -1, 0x72, flag(unicode.Devanagari)},
/* gujarati vowel candra E */
{0x05, 0x45, -1, 0x0D, flag(unicode.Gujarati)},
/* devanagari letter short A */
{0x05, 0x46, -1, 0x04, flag(unicode.Devanagari)},
/* gujarati letter E */
{0x05, 0x47, -1, 0x0F, flag(unicode.Gujarati)},
/* gurmukhi, gujarati letter AI */
{0x05, 0x48, -1, 0x10, flag(unicode.Gurmukhi) | flag(unicode.Gujarati)},
/* devanagari, gujarati vowel candra O */
{0x05, 0x49, -1, 0x11, flag(unicode.Devanagari) | flag(unicode.Gujarati)},
/* devanagari short O */
{0x05, 0x4A, -1, 0x12, flag(unicode.Devanagari)},
/* devanagari, gujarati letter O */
{0x05, 0x4B, -1, 0x13, flag(unicode.Devanagari) | flag(unicode.Gujarati)},
/* devanagari letter AI, gurmukhi letter AU, gujarati letter AU */
{0x05, 0x4C, -1, 0x14, flag(unicode.Devanagari) | flag(unicode.Gurmukhi) | flag(unicode.Gujarati)},
/* devanagari, gujarati vowel candra O */
{0x06, 0x45, -1, 0x11, flag(unicode.Devanagari) | flag(unicode.Gujarati)},
/* devanagari short O */
{0x06, 0x46, -1, 0x12, flag(unicode.Devanagari)},
/* devanagari, gujarati letter O */
{0x06, 0x47, -1, 0x13, flag(unicode.Devanagari) | flag(unicode.Gujarati)},
/* devanagari letter AI, gujarati letter AU */
{0x06, 0x48, -1, 0x14, flag(unicode.Devanagari) | flag(unicode.Gujarati)},
/* malayalam letter II */
{0x07, 0x57, -1, 0x08, flag(unicode.Malayalam)},
/* devanagari letter UU */
{0x09, 0x41, -1, 0x0A, flag(unicode.Devanagari)},
/* tamil, malayalam letter UU (some styles) */
{0x09, 0x57, -1, 0x0A, flag(unicode.Tamil) | flag(unicode.Malayalam)},
/* malayalam letter AI */
{0x0E, 0x46, -1, 0x10, flag(unicode.Malayalam)},
/* devanagari candra E */
{0x0F, 0x45, -1, 0x0D, flag(unicode.Devanagari)},
/* devanagari short E */
{0x0F, 0x46, -1, 0x0E, flag(unicode.Devanagari)},
/* devanagari AI */
{0x0F, 0x47, -1, 0x10, flag(unicode.Devanagari)},
/* oriya AI */
{0x0F, 0x57, -1, 0x10, flag(unicode.Oriya)},
/* malayalam letter OO */
{0x12, 0x3E, -1, 0x13, flag(unicode.Malayalam)},
/* telugu, kannada letter AU */
{0x12, 0x4C, -1, 0x14, flag(unicode.Telugu) | flag(unicode.Kannada)},
/* telugu letter OO */
{0x12, 0x55, -1, 0x13, flag(unicode.Telugu)},
/* tamil, malayalam letter AU */
{0x12, 0x57, -1, 0x14, flag(unicode.Tamil) | flag(unicode.Malayalam)},
/* oriya letter AU */
{0x13, 0x57, -1, 0x14, flag(unicode.Oriya)},
/* devanagari qa */
{0x15, 0x3C, -1, 0x58, flag(unicode.Devanagari)},
/* devanagari, gurmukhi khha */
{0x16, 0x3C, -1, 0x59, flag(unicode.Devanagari) | flag(unicode.Gurmukhi)},
/* devanagari, gurmukhi ghha */
{0x17, 0x3C, -1, 0x5A, flag(unicode.Devanagari) | flag(unicode.Gurmukhi)},
/* devanagari, gurmukhi za */
{0x1C, 0x3C, -1, 0x5B, flag(unicode.Devanagari) | flag(unicode.Gurmukhi)},
/* devanagari dddha, bengali, oriya rra */
{0x21, 0x3C, -1, 0x5C, flag(unicode.Devanagari) | flag(unicode.Bengali) | flag(unicode.Oriya)},
/* devanagari, bengali, oriya rha */
{0x22, 0x3C, -1, 0x5D, flag(unicode.Devanagari) | flag(unicode.Bengali) | flag(unicode.Oriya)},
/* malayalam chillu nn */
{0x23, 0x4D, 0xFF, 0x7A, flag(unicode.Malayalam)},
/* bengali khanda ta */
{0x24, 0x4D, 0xFF, 0x4E, flag(unicode.Bengali)},
/* devanagari nnna */
{0x28, 0x3C, -1, 0x29, flag(unicode.Devanagari)},
/* malayalam chillu n */
{0x28, 0x4D, 0xFF, 0x7B, flag(unicode.Malayalam)},
/* devanagari, gurmukhi fa */
{0x2B, 0x3C, -1, 0x5E, flag(unicode.Devanagari) | flag(unicode.Gurmukhi)},
/* devanagari, bengali yya */
{0x2F, 0x3C, -1, 0x5F, flag(unicode.Devanagari) | flag(unicode.Bengali)},
/* telugu letter vocalic R */
{0x2C, 0x41, 0x41, 0x0B, flag(unicode.Telugu)},
/* devanagari rra */
{0x30, 0x3C, -1, 0x31, flag(unicode.Devanagari)},
/* malayalam chillu rr */
{0x30, 0x4D, 0xFF, 0x7C, flag(unicode.Malayalam)},
/* malayalam chillu l */
{0x32, 0x4D, 0xFF, 0x7D, flag(unicode.Malayalam)},
/* devanagari llla */
{0x33, 0x3C, -1, 0x34, flag(unicode.Devanagari)},
/* malayalam chillu ll */
{0x33, 0x4D, 0xFF, 0x7E, flag(unicode.Malayalam)},
/* telugu letter MA */
{0x35, 0x41, -1, 0x2E, flag(unicode.Telugu)},
/* devanagari, gujarati vowel sign candra O */
{0x3E, 0x45, -1, 0x49, flag(unicode.Devanagari) | flag(unicode.Gujarati)},
/* devanagari vowel sign short O */
{0x3E, 0x46, -1, 0x4A, flag(unicode.Devanagari)},
/* devanagari, gujarati vowel sign O */
{0x3E, 0x47, -1, 0x4B, flag(unicode.Devanagari) | flag(unicode.Gujarati)},
/* devanagari, gujarati vowel sign AU */
{0x3E, 0x48, -1, 0x4C, flag(unicode.Devanagari) | flag(unicode.Gujarati)},
/* kannada vowel sign II */
{0x3F, 0x55, -1, 0x40, flag(unicode.Kannada)},
/* gurmukhi vowel sign UU (when stacking) */
{0x41, 0x41, -1, 0x42, flag(unicode.Gurmukhi)},
/* tamil, malayalam vowel sign O */
{0x46, 0x3E, -1, 0x4A, flag(unicode.Tamil) | flag(unicode.Malayalam)},
/* kannada vowel sign OO */
{0x46, 0x42, 0x55, 0x4B, flag(unicode.Kannada)},
/* kannada vowel sign O */
{0x46, 0x42, -1, 0x4A, flag(unicode.Kannada)},
/* malayalam vowel sign AI (if reordered twice) */
{0x46, 0x46, -1, 0x48, flag(unicode.Malayalam)},
/* telugu, kannada vowel sign EE */
{0x46, 0x55, -1, 0x47, flag(unicode.Telugu) | flag(unicode.Kannada)},
/* telugu, kannada vowel sign AI */
{0x46, 0x56, -1, 0x48, flag(unicode.Telugu) | flag(unicode.Kannada)},
/* tamil, malayalam vowel sign AU */
{0x46, 0x57, -1, 0x4C, flag(unicode.Tamil) | flag(unicode.Malayalam)},
/* bengali, oriya vowel sign O, tamil, malayalam vowel sign OO */
{0x47, 0x3E, -1, 0x4B, flag(unicode.Bengali) | flag(unicode.Oriya) | flag(unicode.Tamil) | flag(unicode.Malayalam)},
/* bengali, oriya vowel sign AU */
{0x47, 0x57, -1, 0x4C, flag(unicode.Bengali) | flag(unicode.Oriya)},
/* kannada vowel sign OO */
{0x4A, 0x55, -1, 0x4B, flag(unicode.Kannada)},
/* gurmukhi letter I */
{0x72, 0x3F, -1, 0x07, flag(unicode.Gurmukhi)},
/* gurmukhi letter II */
{0x72, 0x40, -1, 0x08, flag(unicode.Gurmukhi)},
/* gurmukhi letter EE */
{0x72, 0x47, -1, 0x0F, flag(unicode.Gurmukhi)},
/* gurmukhi letter U */
{0x73, 0x41, -1, 0x09, flag(unicode.Gurmukhi)},
/* gurmukhi letter UU */
{0x73, 0x42, -1, 0x0A, flag(unicode.Gurmukhi)},
/* gurmukhi letter OO */
{0x73, 0x4B, -1, 0x13, flag(unicode.Gurmukhi)},
}
func init() {
for _, scriptData := range scripts {
scriptData.decompMask = bitset.New(0x7d)
for _, decomposition := range decompositions {
ch := decomposition[0]
flags := decomposition[4]
if (flags & scriptData.flag) != 0 {
scriptData.decompMask.Set(uint(ch))
}
}
}
}
func lookupScript(r rune) *unicode.RangeTable {
for script := range scripts {
if unicode.Is(script, r) {
return script
}
}
return nil
}
func normalize(input []rune) []rune {
inputLen := len(input)
for i := 0; i < inputLen; i++ {
r := input[i]
script := lookupScript(r)
if script != nil {
scriptData := scripts[script]
ch := r - scriptData.base
if scriptData.decompMask.Test(uint(ch)) {
input = compose(ch, script, scriptData, input, i, inputLen)
inputLen = len(input)
}
}
}
return input[0:inputLen]
}
func compose(ch0 rune, script0 *unicode.RangeTable, scriptData *ScriptData, input []rune, pos int, inputLen int) []rune {
if pos+1 >= inputLen {
return input // need at least 2 characters
}
ch1 := input[pos+1] - scriptData.base
script1 := lookupScript(input[pos+1])
if script0 != script1 {
return input // need to be same script
}
ch2 := rune(-1)
if pos+2 < inputLen {
ch2 = input[pos+2] - scriptData.base
script2 := lookupScript(input[pos+2])
if input[pos+2] == '\u200D' {
ch2 = 0xff // zero width joiner
} else if script2 != script1 {
ch2 = -1 // still allow 2 character match
}
}
for _, decomposition := range decompositions {
if decomposition[0] == ch0 &&
(decomposition[4]&scriptData.flag) != 0 {
if decomposition[1] == ch1 &&
(decomposition[2] < 0 || decomposition[2] == ch2) {
input[pos] = scriptData.base + decomposition[3]
input = analysis.DeleteRune(input, pos+1)
if decomposition[2] >= 0 {
input = analysis.DeleteRune(input, pos+1)
}
return input
}
}
}
return input
}
================================================
FILE: analysis/lang/it/analyzer_it.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package it
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "it"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopItFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerItFilter, err := cache.TokenFilterNamed(LightStemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
elisionFilter,
stopItFilter,
stemmerItFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/it/analyzer_it_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package it
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestItalianAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stemming
{
input: []byte("abbandonata"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("abbandonat"),
},
},
},
{
input: []byte("abbandonati"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("abbandonat"),
},
},
},
// stop word
{
input: []byte("dallo"),
output: analysis.TokenStream{},
},
// contractions
{
input: []byte("dell'Italia"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ital"),
},
},
},
{
input: []byte("l'Italiano"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("italian"),
},
},
},
// test for bug #218
{
input: []byte("Nell'anfora"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("anfor"),
},
},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}
================================================
FILE: analysis/lang/it/articles_it.go
================================================
package it
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const ArticlesName = "articles_it"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis
var ItalianArticles = []byte(`
c
l
all
dall
dell
nell
sull
coll
pell
gl
agl
dagl
degl
negl
sugl
un
m
t
s
v
d
`)
func ArticlesTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(ItalianArticles)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(ArticlesName, ArticlesTokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/it/elision_it.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package it
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/elision"
"github.com/blevesearch/bleve/v2/registry"
)
const ElisionName = "elision_it"
func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
articlesTokenMap, err := cache.TokenMapNamed(ArticlesName)
if err != nil {
return nil, fmt.Errorf("error building elision filter: %v", err)
}
return elision.NewElisionFilter(articlesTokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(ElisionName, ElisionFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/it/elision_it_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package it
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestItalianElision(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("dell'Italia"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Italia"),
},
},
},
}
cache := registry.NewCache()
elisionFilter, err := cache.TokenFilterNamed(ElisionName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := elisionFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/it/light_stemmer_it.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package it
import (
"bytes"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const LightStemmerName = "stemmer_it_light"
type ItalianLightStemmerFilter struct {
}
func NewItalianLightStemmerFilterFilter() *ItalianLightStemmerFilter {
return &ItalianLightStemmerFilter{}
}
func (s *ItalianLightStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
runes := bytes.Runes(token.Term)
runes = stem(runes)
token.Term = analysis.BuildTermFromRunes(runes)
}
return input
}
func stem(input []rune) []rune {
inputLen := len(input)
if inputLen < 6 {
return input
}
for i := 0; i < inputLen; i++ {
switch input[i] {
case 'à', 'á', 'â', 'ä':
input[i] = 'a'
case 'ò', 'ó', 'ô', 'ö':
input[i] = 'o'
case 'è', 'é', 'ê', 'ë':
input[i] = 'e'
case 'ù', 'ú', 'û', 'ü':
input[i] = 'u'
case 'ì', 'í', 'î', 'ï':
input[i] = 'i'
}
}
switch input[inputLen-1] {
case 'e':
if input[inputLen-2] == 'i' || input[inputLen-2] == 'h' {
return input[0 : inputLen-2]
} else {
return input[0 : inputLen-1]
}
case 'i':
if input[inputLen-2] == 'h' || input[inputLen-2] == 'i' {
return input[0 : inputLen-2]
} else {
return input[0 : inputLen-1]
}
case 'a':
if input[inputLen-2] == 'i' {
return input[0 : inputLen-2]
} else {
return input[0 : inputLen-1]
}
case 'o':
if input[inputLen-2] == 'i' {
return input[0 : inputLen-2]
} else {
return input[0 : inputLen-1]
}
}
return input
}
func ItalianLightStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewItalianLightStemmerFilterFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(LightStemmerName, ItalianLightStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/it/light_stemmer_it_test.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package it
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestItalianLightStemmer(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ragazzo"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ragazz"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ragazzi"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ragazz"),
},
},
},
}
cache := registry.NewCache()
filter, err := cache.TokenFilterNamed(LightStemmerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := filter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/it/stemmer_it_snowball.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package it
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/snowballstem"
"github.com/blevesearch/snowballstem/italian"
)
const SnowballStemmerName = "stemmer_it_snowball"
type ItalianStemmerFilter struct {
}
func NewItalianStemmerFilter() *ItalianStemmerFilter {
return &ItalianStemmerFilter{}
}
func (s *ItalianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
env := snowballstem.NewEnv(string(token.Term))
italian.Stem(env)
token.Term = []byte(env.Current())
}
return input
}
func ItalianStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewItalianStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(SnowballStemmerName, ItalianStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/it/stemmer_it_snowball_test.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package it
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestSnowballItalianStemmer(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("aizzata"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("aizz"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("aizzargli"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("aizz"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("aizzasse"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("aizz"),
},
},
},
}
cache := registry.NewCache()
filter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := filter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/it/stop_filter_it.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package it
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/it/stop_words_it.go
================================================
package it
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_it"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var ItalianStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| An Italian stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
ad | a (to) before vowel
al | a + il
allo | a + lo
ai | a + i
agli | a + gli
all | a + l'
agl | a + gl'
alla | a + la
alle | a + le
con | with
col | con + il
coi | con + i (forms collo, cogli etc are now very rare)
da | from
dal | da + il
dallo | da + lo
dai | da + i
dagli | da + gli
dall | da + l'
dagl | da + gll'
dalla | da + la
dalle | da + le
di | of
del | di + il
dello | di + lo
dei | di + i
degli | di + gli
dell | di + l'
degl | di + gl'
della | di + la
delle | di + le
in | in
nel | in + el
nello | in + lo
nei | in + i
negli | in + gli
nell | in + l'
negl | in + gl'
nella | in + la
nelle | in + le
su | on
sul | su + il
sullo | su + lo
sui | su + i
sugli | su + gli
sull | su + l'
sugl | su + gl'
sulla | su + la
sulle | su + le
per | through, by
tra | among
contro | against
io | I
tu | thou
lui | he
lei | she
noi | we
voi | you
loro | they
mio | my
mia |
miei |
mie |
tuo |
tua |
tuoi | thy
tue |
suo |
sua |
suoi | his, her
sue |
nostro | our
nostra |
nostri |
nostre |
vostro | your
vostra |
vostri |
vostre |
mi | me
ti | thee
ci | us, there
vi | you, there
lo | him, the
la | her, the
li | them
le | them, the
gli | to him, the
ne | from there etc
il | the
un | a
uno | a
una | a
ma | but
ed | and
se | if
perché | why, because
anche | also
come | how
dov | where (as dov')
dove | where
che | who, that
chi | who
cui | whom
non | not
più | more
quale | who, that
quanto | how much
quanti |
quanta |
quante |
quello | that
quelli |
quella |
quelle |
questo | this
questi |
questa |
queste |
si | yes
tutto | all
tutti | all
| single letter forms:
a | at
c | as c' for ce or ci
e | and
i | the
l | as l'
o | or
| forms of avere, to have (not including the infinitive):
ho
hai
ha
abbiamo
avete
hanno
abbia
abbiate
abbiano
avrò
avrai
avrà
avremo
avrete
avranno
avrei
avresti
avrebbe
avremmo
avreste
avrebbero
avevo
avevi
aveva
avevamo
avevate
avevano
ebbi
avesti
ebbe
avemmo
aveste
ebbero
avessi
avesse
avessimo
avessero
avendo
avuto
avuta
avuti
avute
| forms of essere, to be (not including the infinitive):
sono
sei
è
siamo
siete
sia
siate
siano
sarò
sarai
sarà
saremo
sarete
saranno
sarei
saresti
sarebbe
saremmo
sareste
sarebbero
ero
eri
era
eravamo
eravate
erano
fui
fosti
fu
fummo
foste
furono
fossi
fosse
fossimo
fossero
essendo
| forms of fare, to do (not including the infinitive, fa, fat-):
faccio
fai
facciamo
fanno
faccia
facciate
facciano
farò
farai
farà
faremo
farete
faranno
farei
faresti
farebbe
faremmo
fareste
farebbero
facevo
facevi
faceva
facevamo
facevate
facevano
feci
facesti
fece
facemmo
faceste
fecero
facessi
facesse
facessimo
facessero
facendo
| forms of stare, to be (not including the infinitive):
sto
stai
sta
stiamo
stanno
stia
stiate
stiano
starò
starai
starà
staremo
starete
staranno
starei
staresti
starebbe
staremmo
stareste
starebbero
stavo
stavi
stava
stavamo
stavate
stavano
stetti
stesti
stette
stemmo
steste
stettero
stessi
stesse
stessimo
stessero
stando
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(ItalianStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/nl/analyzer_nl.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package nl
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "nl"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopNlFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerNlFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: unicodeTokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopNlFilter,
stemmerNlFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/nl/analyzer_nl_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package nl
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestDutchAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stemming
{
input: []byte("lichamelijk"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("licham"),
},
},
},
{
input: []byte("lichamelijke"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("licham"),
},
},
},
// stop word
{
input: []byte("van"),
output: analysis.TokenStream{},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}
================================================
FILE: analysis/lang/nl/stemmer_nl.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package nl
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/snowballstem"
"github.com/blevesearch/snowballstem/dutch"
)
const SnowballStemmerName = "stemmer_nl_snowball"
type DutchStemmerFilter struct {
}
func NewDutchStemmerFilter() *DutchStemmerFilter {
return &DutchStemmerFilter{}
}
func (s *DutchStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
env := snowballstem.NewEnv(string(token.Term))
dutch.Stem(env)
token.Term = []byte(env.Current())
}
return input
}
func DutchStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewDutchStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(SnowballStemmerName, DutchStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/nl/stop_filter_nl.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package nl
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/nl/stop_words_nl.go
================================================
package nl
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_nl"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var DutchStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Dutch stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
| This is a ranked list (commonest to rarest) of stopwords derived from
| a large sample of Dutch text.
| Dutch stop words frequently exhibit homonym clashes. These are indicated
| clearly below.
de | the
en | and
van | of, from
ik | I, the ego
te | (1) chez, at etc, (2) to, (3) too
dat | that, which
die | that, those, who, which
in | in, inside
een | a, an, one
hij | he
het | the, it
niet | not, nothing, naught
zijn | (1) to be, being, (2) his, one's, its
is | is
was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
op | on, upon, at, in, up, used up
aan | on, upon, to (as dative)
met | with, by
als | like, such as, when
voor | (1) before, in front of, (2) furrow
had | had, past tense all persons sing. of 'hebben' (have)
er | there
maar | but, only
om | round, about, for etc
hem | him
dan | then
zou | should/would, past tense all persons sing. of 'zullen'
of | or, whether, if
wat | what, something, anything
mijn | possessive and noun 'mine'
men | people, 'one'
dit | this
zo | so, thus, in this way
door | through by
over | over, across
ze | she, her, they, them
zich | oneself
bij | (1) a bee, (2) by, near, at
ook | also, too
tot | till, until
je | you
mij | me
uit | out of, from
der | Old Dutch form of 'van der' still found in surnames
daar | (1) there, (2) because
haar | (1) her, their, them, (2) hair
naar | (1) unpleasant, unwell etc, (2) towards, (3) as
heb | present first person sing. of 'to have'
hoe | how, why
heeft | present third person sing. of 'to have'
hebben | 'to have' and various parts thereof
deze | this
u | you
want | (1) for, (2) mitten, (3) rigging
nog | yet, still
zal | 'shall', first and third person sing. of verb 'zullen' (will)
me | me
zij | she, they
nu | now
ge | 'thou', still used in Belgium and south Netherlands
geen | none
omdat | because
iets | something, somewhat
worden | to become, grow, get
toch | yet, still
al | all, every, each
waren | (1) 'were' (2) to wander, (3) wares, (3)
veel | much, many
meer | (1) more, (2) lake
doen | to do, to make
toen | then, when
moet | noun 'spot/mote' and present form of 'to must'
ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
zonder | without
kan | noun 'can' and present form of 'to be able'
hun | their, them
dus | so, consequently
alles | all, everything, anything
onder | under, beneath
ja | yes, of course
eens | once, one day
hier | here
wie | who
werd | imperfect third person sing. of 'become'
altijd | always
doch | yet, but etc
wordt | present third person sing. of 'become'
wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
kunnen | to be able
ons | us/our
zelf | self
tegen | against, towards, at
na | after, near
reeds | already
wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
kon | could; past tense of 'to be able'
niets | nothing
uw | your
iemand | somebody
geweest | been; past participle of 'be'
andere | other
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(DutchStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/no/analyzer_no.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package no
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "no"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopNoFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerNoFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: unicodeTokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopNoFilter,
stemmerNoFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/no/analyzer_no_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package no
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestNorwegianAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stemming
{
input: []byte("havnedistriktene"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("havnedistrikt"),
},
},
},
{
input: []byte("havnedistrikter"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("havnedistrikt"),
},
},
},
// stop word
{
input: []byte("det"),
output: analysis.TokenStream{},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}
================================================
FILE: analysis/lang/no/stemmer_no.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package no
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/snowballstem"
"github.com/blevesearch/snowballstem/norwegian"
)
const SnowballStemmerName = "stemmer_no_snowball"
type NorwegianStemmerFilter struct {
}
func NewNorwegianStemmerFilter() *NorwegianStemmerFilter {
return &NorwegianStemmerFilter{}
}
func (s *NorwegianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
env := snowballstem.NewEnv(string(token.Term))
norwegian.Stem(env)
token.Term = []byte(env.Current())
}
return input
}
func NorwegianStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewNorwegianStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(SnowballStemmerName, NorwegianStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/no/stop_filter_no.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package no
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/no/stop_words_no.go
================================================
package no
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_no"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var NorwegianStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Norwegian stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
| This stop word list is for the dominant bokmål dialect. Words unique
| to nynorsk are marked *.
| Revised by Jan Bruusgaard , Jan 2005
og | and
i | in
jeg | I
det | it/this/that
at | to (w. inf.)
en | a/an
et | a/an
den | it/this/that
til | to
er | is/am/are
som | who/that
på | on
de | they / you(formal)
med | with
han | he
av | of
ikke | not
ikkje | not *
der | there
så | so
var | was/were
meg | me
seg | you
men | but
ett | one
har | have
om | about
vi | we
min | my
mitt | my
ha | have
hadde | had
hun | she
nå | now
over | over
da | when/as
ved | by/know
fra | from
du | you
ut | out
sin | your
dem | them
oss | us
opp | up
man | you/one
kan | can
hans | his
hvor | where
eller | or
hva | what
skal | shall/must
selv | self (reflective)
sjøl | self (reflective)
her | here
alle | all
vil | will
bli | become
ble | became
blei | became *
blitt | have become
kunne | could
inn | in
når | when
være | be
kom | come
noen | some
noe | some
ville | would
dere | you
som | who/which/that
deres | their/theirs
kun | only/just
ja | yes
etter | after
ned | down
skulle | should
denne | this
for | for/because
deg | you
si | hers/his
sine | hers/his
sitt | hers/his
mot | against
å | to
meget | much
hvorfor | why
dette | this
disse | these/those
uten | without
hvordan | how
ingen | none
din | your
ditt | your
blir | become
samme | same
hvilken | which
hvilke | which (plural)
sånn | such a
inni | inside/within
mellom | between
vår | our
hver | each
hvem | who
vors | us/ours
hvis | whose
både | both
bare | only/just
enn | than
fordi | as/because
før | before
mange | many
også | also
slik | just
vært | been
være | to be
båe | both *
begge | both
siden | since
dykk | your *
dykkar | yours *
dei | they *
deira | them *
deires | theirs *
deim | them *
di | your (fem.) *
då | as/when *
eg | I *
ein | a/an *
eit | a/an *
eitt | a/an *
elles | or *
honom | he *
hjå | at *
ho | she *
hoe | she *
henne | her
hennar | her/hers
hennes | hers
hoss | how *
hossen | how *
ikkje | not *
ingi | noone *
inkje | noone *
korleis | how *
korso | how *
kva | what/which *
kvar | where *
kvarhelst | where *
kven | who/whom *
kvi | why *
kvifor | why *
me | we *
medan | while *
mi | my *
mine | my *
mykje | much *
no | now *
nokon | some (masc./neut.) *
noka | some (fem.) *
nokor | some *
noko | some *
nokre | some *
si | his/hers *
sia | since *
sidan | since *
so | so *
somt | some *
somme | some *
um | about*
upp | up *
vere | be *
vore | was *
verte | become *
vort | become *
varte | became *
vart | became *
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(NorwegianStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/pl/analyzer_pl.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pl
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "pl"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopPlFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerPlFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopPlFilter,
stemmerPlFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/pl/analyzer_pl_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pl
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestPolishAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stemming
{
input: []byte("śmiało"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("śmieć"),
},
},
},
{
input: []byte("przypadku"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("przypadek"),
},
},
},
// stop word
{
input: []byte("według"),
output: analysis.TokenStream{},
},
// digits safe
{
input: []byte("text 1000"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("text"),
},
&analysis.Token{
Term: []byte("1000"),
},
},
},
{
input: []byte("badawczego było opracowanie kompendium które przystępny sposób prezentowało niespecjalistom zakresu kryptografii kwantowej wykorzystanie technik kwantowych do bezpiecznego przesyłu przetwarzania informacji"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("badawczy"),
},
&analysis.Token{
Term: []byte("opracować"),
},
&analysis.Token{
Term: []byte("kompendium"),
},
&analysis.Token{
Term: []byte("przystyć"),
},
&analysis.Token{
Term: []byte("prezentować"),
},
&analysis.Token{
Term: []byte("niespecjalista"),
},
&analysis.Token{
Term: []byte("zakres"),
},
&analysis.Token{
Term: []byte("kryptografia"),
},
&analysis.Token{
Term: []byte("kwantowy"),
},
&analysis.Token{
Term: []byte("wykorzyseć"),
},
&analysis.Token{
Term: []byte("technika"),
},
&analysis.Token{
Term: []byte("kwantowy"),
},
&analysis.Token{
Term: []byte("bezpieczny"),
},
&analysis.Token{
Term: []byte("przesył"),
},
&analysis.Token{
Term: []byte("przetwarzać"),
},
&analysis.Token{
Term: []byte("informacja"),
},
},
},
{
input: []byte("Ale ta wiedza była utrzymywana w tajemnicy"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("wiedza"),
},
&analysis.Token{
Term: []byte("utrzymywać"),
},
&analysis.Token{
Term: []byte("tajemnik"),
},
},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}
================================================
FILE: analysis/lang/pl/stemmer_pl.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pl
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/lang/pl/stempel"
"github.com/blevesearch/bleve/v2/registry"
)
const SnowballStemmerName = "stemmer_pl"
type PolishStemmerFilter struct {
trie stempel.Trie
}
func NewPolishStemmerFilter() (*PolishStemmerFilter, error) {
trie, err := stempel.LoadTrie()
if err != nil {
return nil, err
}
return &PolishStemmerFilter{
trie: trie,
}, nil
}
func (s *PolishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
buff := []rune(string(token.Term))
diff := s.trie.GetLastOnPath(buff)
buff = stempel.Diff(buff, diff)
token.Term = []byte(string(buff))
}
return input
}
func PolishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewPolishStemmerFilter()
}
func init() {
err := registry.RegisterTokenFilter(SnowballStemmerName, PolishStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/pl/stemmer_pl_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pl
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestPolishStemmer(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("utrzymywana"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("utrzymywać"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("tajemnicy"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("tajemnik"),
},
},
},
}
cache := registry.NewCache()
filter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := filter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/pl/stempel/LICENSE
================================================
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
================================================
FILE: analysis/lang/pl/stempel/cell.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package stempel
import (
"fmt"
"github.com/blevesearch/stempel/javadata"
)
type cell struct {
ref int32
cmd int32
}
func (c *cell) String() string {
return fmt.Sprintf("ref(%d) cmd(%d)", c.ref, c.cmd)
}
func newCell(r *javadata.Reader) (*cell, error) {
cmd, err := r.ReadInt32()
if err != nil {
return nil, fmt.Errorf("error reading cell cmd: %v", err)
}
_, err = r.ReadInt32()
if err != nil {
return nil, fmt.Errorf("error reading cell cnt: %v", err)
}
ref, err := r.ReadInt32()
if err != nil {
return nil, fmt.Errorf("error reading cell ref: %v", err)
}
_, err = r.ReadInt32()
if err != nil {
return nil, fmt.Errorf("error reading cell skip: %v", err)
}
return &cell{
cmd: cmd,
ref: ref,
}, nil
}
================================================
FILE: analysis/lang/pl/stempel/diff.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package stempel
// Diff transforms the dest rune slice following the rules described
// in the diff command rune slice.
func Diff(dest, diff []rune) []rune {
if len(diff) == 0 {
return dest
}
pos := len(dest) - 1
if pos < 0 {
return dest
}
for i := 0; i < len(diff)/2; i++ {
cmd := diff[2*i]
param := diff[2*i+1]
parNum := int(param - 'a' + 1)
switch cmd {
case '-':
pos = pos - parNum + 1
case 'R':
if pos < 0 || pos >= len(dest) {
// out of bounds, just return
return dest
}
dest[pos] = param
case 'D':
o := pos
pos -= parNum - 1
if pos < 0 || pos >= len(dest) {
// out of bounds, just return
return dest
}
dest = append(dest[:pos], dest[o+1:]...)
case 'I':
pos++
if pos < 0 || pos > len(dest) {
// out of bounds, just return
return dest
}
dest = append(dest, 0)
copy(dest[pos+1:], dest[pos:])
dest[pos] = param
}
pos--
}
return dest
}
================================================
FILE: analysis/lang/pl/stempel/diff_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package stempel
import (
"fmt"
"reflect"
"testing"
)
func TestDiff(t *testing.T) {
tests := []struct {
in []rune
cmd []rune
out []rune
}{
// test delete, this command deletes N chars backwards from the current pos
// the current pos starts at the end of the string
// if you try to delete a negative number of chars or more chars than there
// are, you will get the buffer at that time
{
in: []rune{'h', 'e', 'l', 'l', 'o'},
// delete 1
cmd: []rune{'D', 'a'},
out: []rune{'h', 'e', 'l', 'l'},
},
{
in: []rune{'h', 'e', 'l', 'l', 'o'},
// delete 2
cmd: []rune{'D', 'a' + 1},
out: []rune{'h', 'e', 'l'},
},
{
in: []rune{'h', 'e', 'l', 'l', 'o'},
// delete 3
cmd: []rune{'D', 'a' + 2},
out: []rune{'h', 'e'},
},
{
in: []rune{'h', 'e', 'l', 'l', 'o'},
// delete 4
cmd: []rune{'D', 'a' + 3},
out: []rune{'h'},
},
{
in: []rune{'h', 'e', 'l', 'l', 'o'},
// delete 5
cmd: []rune{'D', 'a' + 4},
out: []rune{},
},
{
in: []rune{'h', 'e', 'l', 'l', 'o'},
// delete 6 (invalid, return buffer at that point)
cmd: []rune{'D', 'a' + 5},
out: []rune{'h', 'e', 'l', 'l', 'o'},
},
{
in: []rune{'h', 'e', 'l', 'l', 'o'},
// delete -1
cmd: []rune{'D', 'a' - 1},
out: []rune{'h', 'e', 'l', 'l', 'o'},
},
// delete one char twice
{
in: []rune{'h', 'e', 'l', 'l', 'o'},
// delete 1, delete 1
cmd: []rune{'D', 'a', 'D', 'a'},
out: []rune{'h', 'e', 'l'},
},
// test insert
{
in: []rune{'h', 'e', 'l', 'l', 'o'},
// insert 'p'
cmd: []rune{'I', 'p'},
out: []rune{'h', 'e', 'l', 'l', 'o', 'p'},
},
// insert twice
{
in: []rune{'h'},
// insert 'l', insert 'e'
// NOTE how the cursor moves backwards, so we have to insert in reverse
cmd: []rune{'I', 'l', 'I', 'e'},
out: []rune{'h', 'e', 'l'},
},
// test replace
{
in: []rune{'h', 'e', 'l', 'l', 'o'},
// replace with 'y'
cmd: []rune{'R', 'y'},
out: []rune{'h', 'e', 'l', 'l', 'y'},
},
// test replace again
{
in: []rune{'h', 'e', 'l', 'l', 'o'},
// replace with 'y', then replace with 'x'
// NOTE how the cursor moves backwards as we replace
cmd: []rune{'R', 'y', 'R', 'x'},
out: []rune{'h', 'e', 'l', 'x', 'y'},
},
// test skip, then replace
{
in: []rune{'h', 'e', 'l', 'l', 'o'},
// skip 1, then replace with 'y'
cmd: []rune{'-', 'a', 'R', 'y'},
out: []rune{'h', 'e', 'l', 'y', 'o'},
},
// test skip 2, then replace
{
in: []rune{'h', 'e', 'l', 'l', 'o'},
// skip 1, then replace with 'y'
cmd: []rune{'-', 'a' + 1, 'R', 'y'},
out: []rune{'h', 'e', 'y', 'l', 'o'},
},
// test skip 2, then replace
{
in: []rune{'h', 'e', 'l', 'l', 'o'},
// skip 5 (too far), then replace with 'y'
// get original
cmd: []rune{'-', 'a' + 4, 'R', 'y'},
out: []rune{'h', 'e', 'l', 'l', 'o'},
},
}
for _, test := range tests {
t.Run(fmt.Sprintf("%s-'%s'", string(test.in), string(test.cmd)), func(t *testing.T) {
got := Diff(test.in, test.cmd)
if !reflect.DeepEqual(test.out, got) {
t.Errorf("expected %v, got %v", test.out, got)
}
})
}
}
================================================
FILE: analysis/lang/pl/stempel/file.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package stempel
import (
"bytes"
_ "embed"
"github.com/blevesearch/stempel/javadata"
"io"
"os"
"strings"
)
//go:embed pl/stemmer_20000.tbl
var stempelFile []byte
// Trie is the external interface to work with the stempel trie
type Trie interface {
GetLastOnPath([]rune) []rune
}
// Open attempts to open a file at the specified path, and use it to
// build a Trie
func Open(path string) (Trie, error) {
f, err := os.Open(path)
if err != nil {
return nil, err
}
return buildTrieFromReader(f)
}
// LoadTrie load trie from embed file
func LoadTrie() (Trie, error) {
return buildTrieFromReader(bytes.NewReader(stempelFile))
}
// buildTrieFromReader build trie from io.Reader
func buildTrieFromReader(f io.Reader) (Trie, error) {
r := javadata.NewReader(f)
method, err := r.ReadUTF()
if err != nil {
return nil, err
}
var rv Trie
if strings.Contains(method, "M") {
rv, err = newMultiTrie(r)
if err != nil {
return nil, err
}
} else {
rv, err = newTrie(r)
if err != nil {
return nil, err
}
}
return rv, nil
}
================================================
FILE: analysis/lang/pl/stempel/file_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package stempel
import (
"bufio"
"compress/gzip"
"os"
"strings"
"testing"
"golang.org/x/text/encoding/charmap"
)
func TestEmpty(t *testing.T) {
trie, err := Open("pl/stemmer_20000.tbl")
if err != nil {
t.Fatal(err)
}
buff := []rune("")
diff := trie.GetLastOnPath(buff)
if len(diff) > 0 {
t.Fatalf("expected empty diff, got %v", diff)
}
buff = Diff(buff, diff)
if len(buff) > 0 {
t.Fatalf("expected empty buff, got %v", buff)
}
}
// TestStem only tests that we can successfully stem everything in the
// dictionary without crashing. It does not attempt to assert correct output.
func TestStem(t *testing.T) {
trie, err := Open("pl/stemmer_20000.tbl")
if err != nil {
t.Fatal(err)
}
wordFileGz, err := os.Open("pl/pl_PL.dic.gz")
if err != nil {
t.Fatal(err)
}
defer func() {
cerr := wordFileGz.Close()
if cerr != nil {
t.Fatal(cerr)
}
}()
wordFile, err := gzip.NewReader(wordFileGz)
if err != nil {
t.Fatal(err)
}
defer func() {
cerr := wordFile.Close()
if cerr != nil {
t.Fatal(cerr)
}
}()
cr := charmap.ISO8859_2.NewDecoder().Reader(wordFile)
scanner := bufio.NewScanner(cr)
for scanner.Scan() {
before := scanner.Text()
hasSlash := strings.Index(before, "/")
if hasSlash > 0 {
before = before[0:hasSlash]
}
buff := []rune(before)
diff := trie.GetLastOnPath(buff)
_ = Diff(buff, diff)
}
if err := scanner.Err(); err != nil {
t.Fatal(err)
}
}
================================================
FILE: analysis/lang/pl/stempel/fuzz.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build gofuzz
// +build gofuzz
package stempel
var fuzzTrie Trie
func init() {
var err error
fuzzTrie, err = Open("pl/stemmer_20000.tbl")
if err != nil {
panic(err)
}
}
func Fuzz(data []byte) int {
inRunes := []rune(string(data))
diff := fuzzTrie.GetLastOnPath(inRunes)
_ = Diff(inRunes, diff)
return 1
}
================================================
FILE: analysis/lang/pl/stempel/javadata/README.md
================================================
# javadata
Go library to read data written with java.io.DataOutput
================================================
FILE: analysis/lang/pl/stempel/javadata/fuzz.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build gofuzz
// +build gofuzz
package javadata
import "bytes"
func Fuzz(data []byte) int {
br := bytes.NewReader(data)
jdr := NewReader(br)
var err error
for err == nil {
_, err = jdr.ReadUTF()
}
if err != nil {
return 0
}
return 1
}
================================================
FILE: analysis/lang/pl/stempel/javadata/input.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package javadata
import (
"bufio"
"encoding/binary"
"fmt"
"io"
)
// ErrMalformedInput returned when malformed input is encountered
var ErrMalformedInput = fmt.Errorf("malformed input")
// Reader knows how to read java serialized data
type Reader struct {
r *bufio.Reader
}
// NewReader creates a new java data input reader
func NewReader(r io.Reader) *Reader {
return &Reader{r: bufio.NewReader(r)}
}
// ReadBool attempts to reads a bool from the stream
func (r *Reader) ReadBool() (bool, error) {
b, err := r.r.ReadByte()
if err != nil {
return false, err
}
return b != 0, nil
}
// ReadInt32 attempts to reads a signed 32-bit integer from the stream
func (r *Reader) ReadInt32() (rv int32, err error) {
err = binary.Read(r.r, binary.BigEndian, &rv)
return
}
// ReadUint16 attempts to reads a unsigned 16-bit integer from the stream
func (r *Reader) ReadUint16() (rv uint16, err error) {
err = binary.Read(r.r, binary.BigEndian, &rv)
return
}
// ReadCharAsRune attempts to read a java two byte char and return it as a rune
func (r *Reader) ReadCharAsRune() (rv rune, err error) {
var char uint16
err = binary.Read(r.r, binary.BigEndian, &char)
rv = rune(char)
return
}
// ReadUTF attempts to reads a UTF-encoded string from the stream
// this method follows the specific alternate encoding desribed here:
// https://docs.oracle.com/javase/7/docs/api/java/io/DataInput.html
func (r *Reader) ReadUTF() (string, error) {
utfLen, err := r.ReadUint16()
if err != nil {
return "", err
}
bytes := make([]byte, utfLen)
runes := make([]rune, utfLen)
_, err = io.ReadFull(r.r, bytes)
if err != nil {
return "", err
}
var count uint16
var runeCount uint16
// handle simple case of all ascii
for count < utfLen {
c := bytes[count]
if bytes[count] > 127 {
break
}
count++
runes[runeCount] = rune(c)
runeCount++
}
// handle rest
for count < utfLen {
c := bytes[count]
switch bytes[count] >> 4 {
case 0, 1, 2, 3, 4, 5, 6, 7, 8:
/* 0xxxxxxx*/
count++
runes[runeCount] = rune(c)
runeCount++
case 12, 13:
/* 110x xxxx 10xx xxxx*/
count += 2
if count > utfLen {
return "", ErrMalformedInput
}
char2 := rune(bytes[count-1])
if (char2 & 0xC0) != 0x80 {
return "", ErrMalformedInput
}
runes[runeCount] = (rune(c)&0x1F)<<6 | char2&0x3F
runeCount++
case 14:
/* 1110 xxxx 10xx xxxx 10xx xxxx */
count += 3
if count > utfLen {
return "", ErrMalformedInput
}
char2 := rune(bytes[count-2])
char3 := rune(bytes[count-1])
if ((char2 & 0xC0) != 0x80) || ((char3 & 0xC0) != 0x80) {
return "", ErrMalformedInput
}
runes[runeCount] = ((rune(c)&0x0F)<<12 | (char2&0x3F)<<6 | (char3&0x3F)<<0)
runeCount++
default:
/* 10xx xxxx, 1111 xxxx */
return "", ErrMalformedInput
}
}
return string(runes[0:runeCount]), nil
}
================================================
FILE: analysis/lang/pl/stempel/javadata/input_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package javadata
import (
"bytes"
"io"
"testing"
)
func TestReadBool(t *testing.T) {
tests := []struct {
in []byte
out bool
err error
}{
{
in: []byte{0},
out: false,
},
{
in: []byte{1},
out: true,
},
{
in: []byte{27},
out: true,
},
{
in: []byte{},
err: io.EOF,
},
}
for _, test := range tests {
t.Run(string(test.in), func(t *testing.T) {
sr := bytes.NewReader(test.in)
dr := NewReader(sr)
actual, err := dr.ReadBool()
if err != test.err {
t.Error(err)
}
if actual != test.out {
t.Errorf("expected %t, got %t", test.out, actual)
}
})
}
}
func TestReadUint16(t *testing.T) {
tests := []struct {
in []byte
out uint16
err error
}{
{
in: []byte{0, 0},
out: 0,
},
{
in: []byte{0, 1},
out: 1,
},
{
in: []byte{1, 0},
out: 256,
},
{
in: []byte{},
err: io.EOF,
},
}
for _, test := range tests {
t.Run(string(test.in), func(t *testing.T) {
sr := bytes.NewReader(test.in)
dr := NewReader(sr)
actual, err := dr.ReadUint16()
if err != test.err {
t.Error(err)
}
if actual != test.out {
t.Errorf("expected %d, got %d", test.out, actual)
}
})
}
}
func TestReadInt32(t *testing.T) {
tests := []struct {
in []byte
out int32
err error
}{
{
in: []byte{0, 0, 0, 0},
out: 0,
},
{
in: []byte{0, 0, 0, 1},
out: 1,
},
{
in: []byte{0, 0, 1, 0},
out: 256,
},
{
in: []byte{0, 1, 0, 0},
out: 65536,
},
{
in: []byte{},
err: io.EOF,
},
}
for _, test := range tests {
t.Run(string(test.in), func(t *testing.T) {
sr := bytes.NewReader(test.in)
dr := NewReader(sr)
actual, err := dr.ReadInt32()
if err != test.err {
t.Error(err)
}
if actual != test.out {
t.Errorf("expected %d, got %d", test.out, actual)
}
})
}
}
func TestReadUTF(t *testing.T) {
tests := []struct {
in []byte
out string
err error
}{
{
in: []byte{0, 3, 'c', 'a', 't'},
out: "cat",
},
{
in: []byte{0, 2, 0xc2, 0xa3},
out: "£",
},
{
in: []byte{0, 3, 0xe3, 0x85, 0x85},
out: "ㅅ",
},
{
in: []byte{0, 6, 0xe3, 0x85, 0x85, 'c', 'a', 't'},
out: "ㅅcat",
},
{
in: []byte{},
err: io.EOF,
},
{
in: []byte{0, 3},
err: io.EOF,
},
{
in: []byte{0, 1, 0xc2},
err: ErrMalformedInput,
},
{
in: []byte{0, 2, 0xc2, 0xc3},
err: ErrMalformedInput,
},
{
in: []byte{0, 2, 0xe3, 0x85},
err: ErrMalformedInput,
},
{
in: []byte{0, 3, 0xe3, 0xc5, 0x85},
err: ErrMalformedInput,
},
{
in: []byte{0, 1, 0xff},
err: ErrMalformedInput,
},
{
in: []byte{0x0, 0x05, 0x44, 0x61, 0x52, 0xc4, 0x87},
out: "DaRć",
},
}
for _, test := range tests {
t.Run(string(test.in), func(t *testing.T) {
sr := bytes.NewReader(test.in)
dr := NewReader(sr)
actual, err := dr.ReadUTF()
if err != test.err {
t.Error(err)
}
if actual != test.out {
t.Errorf("expected %s, got %s", test.out, actual)
}
})
}
}
// func TestFile(t *testing.T) {
// f, err := os.Open("stemmer_20000.tbl")
// if err != nil {
// t.Fatal(err)
// }
// r := NewReader(f)
// reversed, err := r.ReadBool()
// if err != nil {
// t.Fatal(err)
// }
// log.Printf("reversed: %t", reversed)
// root, err := r.ReadInt32()
// if err != nil {
// t.Fatal(err)
// }
// log.Printf("root: %d", root)
// n, err := r.ReadInt32()
// if err != nil {
// t.Fatal(err)
// }
// log.Printf("n is %d", n)
// // for n > 0 {
// // utf, err := r.ReadUTF()
// // if err != nil {
// // t.Error(err)
// // }
// // log.Printf("read: %s", utf)
// // n--
// // }
// }
================================================
FILE: analysis/lang/pl/stempel/multi_trie.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package stempel
import (
"fmt"
"github.com/blevesearch/stempel/javadata"
)
// multiTrie represents a trie of tries. When using the multiTrie, each trie
// is consulted consecutively to find commands to perform on the input. Thus
// a multiTrie with seven tries might have up to seven groups of commands to
// perform on the input.
type multiTrie struct {
tries []*trie
by int32
forward bool
}
func newMultiTrie(r *javadata.Reader) (rv *multiTrie, err error) {
rv = &multiTrie{}
rv.forward, err = r.ReadBool()
if err != nil {
return nil, err
}
rv.by, err = r.ReadInt32()
if err != nil {
return nil, err
}
nTries, err := r.ReadInt32()
if err != nil {
return nil, err
}
for nTries > 0 {
trie, err := newTrie(r)
if err != nil {
return nil, err
}
rv.tries = append(rv.tries, trie)
nTries--
}
return rv, nil
}
const eom = rune('*')
func (t *multiTrie) GetLastOnPath(key []rune) []rune {
var rv []rune
lastKey := key
p := make([][]rune, len(t.tries))
lastR := ' '
for i := 0; i < len(t.tries); i++ {
r := t.tries[i].GetLastOnPath(lastKey)
if len(r) == 0 || len(r) == 1 && r[0] == eom {
return rv
}
if cannotFollow(lastR, r[0]) {
return rv
}
lastR = r[len(r)-2]
p[i] = r
if p[i][0] == '-' {
if i > 0 {
var err error
key, err = t.skip(key, lengthPP(p[i-1]))
if err != nil {
return rv
}
}
var err error
key, err = t.skip(key, lengthPP(p[i]))
if err != nil {
return rv
}
}
rv = append(rv, r...)
if len(key) != 0 {
lastKey = key
}
}
return rv
}
func cannotFollow(after, goes rune) bool {
switch after {
case '-', 'D':
return after == goes
}
return false
}
var errIndexOutOfBounds = fmt.Errorf("index out of bounds")
func (t *multiTrie) skip(in []rune, count int) ([]rune, error) {
if count > len(in) {
return nil, errIndexOutOfBounds
}
if t.forward {
return in[count:], nil
}
return in[0 : len(in)-count], nil
}
func lengthPP(cmd []rune) int {
rv := 0
for i := 0; i < len(cmd); i++ {
switch cmd[i] {
case '-', 'D':
i++
rv += int(cmd[i] - rune('a') + 1)
case 'R':
i++
rv++
fallthrough
case 'I':
}
}
return rv
}
func (t *multiTrie) String() string {
rv := ""
for i, trie := range t.tries {
rv += fmt.Sprintf("trie %d\n\n %v\n--------\n", i, trie)
}
return rv
}
================================================
FILE: analysis/lang/pl/stempel/row.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package stempel
import (
"fmt"
"github.com/blevesearch/stempel/javadata"
)
type row struct {
cells map[rune]*cell
}
func (r *row) String() string {
rv := ""
for k, v := range r.cells {
rv += fmt.Sprintf("[%s:%v]\n", string(k), v)
}
return rv
}
func newRow(r *javadata.Reader) (*row, error) {
rv := &row{
cells: make(map[rune]*cell),
}
nCells, err := r.ReadInt32()
if err != nil {
return nil, fmt.Errorf("error reading num cells: %v", err)
}
for nCells > 0 {
c, err := r.ReadCharAsRune()
if err != nil {
return nil, fmt.Errorf("error reading cell char: %v", err)
}
cell, err := newCell(r)
if err != nil {
return nil, fmt.Errorf("error reading cell: %v", err)
}
rv.cells[c] = cell
nCells--
}
return rv, nil
}
func (r *row) getCmd(way rune) int32 {
c := r.at(way)
if c != nil {
return c.cmd
}
return -1
}
func (r *row) getRef(way rune) int32 {
c := r.at(way)
if c != nil {
return c.ref
}
return -1
}
func (r *row) at(c rune) *cell {
return r.cells[c]
}
================================================
FILE: analysis/lang/pl/stempel/strenum.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package stempel
import (
"io"
)
type strEnum struct {
r []rune
from int
by int
}
func newStrEnum(s []rune, up bool) *strEnum {
rv := &strEnum{
r: s,
}
if up {
rv.from = 0
rv.by = 1
} else {
rv.from = len(s) - 1
rv.by = -1
}
return rv
}
func (s *strEnum) next() (rune, error) {
if s.from < 0 || s.from >= len(s.r) {
return 0, io.EOF
}
rv := s.r[s.from]
s.from += s.by
return rv, nil
}
================================================
FILE: analysis/lang/pl/stempel/strenum_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package stempel
import (
"fmt"
"io"
"reflect"
"testing"
)
func TestStrenumNext(t *testing.T) {
tests := []struct {
in []rune
up bool
expect []rune
}{
{
in: []rune{'h', 'e', 'l', 'l', 'o'},
up: true,
expect: []rune{'h', 'e', 'l', 'l', 'o'},
},
{
in: []rune{'h', 'e', 'l', 'l', 'o'},
up: false,
expect: []rune{'o', 'l', 'l', 'e', 'h'},
},
}
for _, test := range tests {
t.Run(fmt.Sprintf("%s-up-%t", string(test.in), test.up), func(t *testing.T) {
strenum := newStrEnum(test.in, test.up)
var got []rune
next, err := strenum.next()
for err == nil {
got = append(got, next)
next, err = strenum.next()
}
if err != io.EOF {
t.Errorf("next got err: %v", err)
}
if !reflect.DeepEqual(got, test.expect) {
t.Errorf("expected %v, got %v", test.expect, got)
}
})
}
}
================================================
FILE: analysis/lang/pl/stempel/trie.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package stempel
import (
"fmt"
"github.com/blevesearch/stempel/javadata"
)
// trie represents the internal trie structure
type trie struct {
rows []*row
cmds []string
root int32
forward bool
}
func newTrie(r *javadata.Reader) (rv *trie, err error) {
rv = &trie{}
rv.forward, err = r.ReadBool()
if err != nil {
return nil, fmt.Errorf("error reading trie forward: %v", err)
}
rv.root, err = r.ReadInt32()
if err != nil {
return nil, fmt.Errorf("error reading trie root: %v", err)
}
// commands
nCommands, err := r.ReadInt32()
if err != nil {
return nil, fmt.Errorf("error reading trie num commands: %v", err)
}
for nCommands > 0 {
utfCommand, nerr := r.ReadUTF()
if nerr != nil {
return nil, fmt.Errorf("error reading trie command utf: %v", nerr)
}
rv.cmds = append(rv.cmds, utfCommand)
nCommands--
}
// rows
nRows, err := r.ReadInt32()
if err != nil {
return nil, fmt.Errorf("error reading trie num rows: %v", err)
}
for nRows > 0 {
row, err := newRow(r)
if err != nil {
return nil, fmt.Errorf("error reading trie row: %v", err)
}
rv.rows = append(rv.rows, row)
nRows--
}
return rv, nil
}
func (t *trie) getRow(i int) *row {
if i < 0 || i >= len(t.rows) {
return nil
}
return t.rows[i]
}
func (t *trie) GetLastOnPath(key []rune) []rune {
now := t.getRow(int(t.root))
var last []rune
var w int32
e := newStrEnum(key, t.forward)
// walk over each rune
// if rune has row in the table, note the cmd (as last)
// if rune has row in table, see if it transitions to another row
// if it does, move to that row and next char on next loop itr
// if it does not, return the last cmd
// if you get to end of string and there is command in row use it
// or return last
for i := 0; i < len(key)-1; i++ {
r, err := e.next()
if err != nil {
return last
}
w = now.getCmd(r)
if w >= 0 {
last = []rune(t.cmds[w])
}
w = now.getRef(r)
if w >= 0 {
now = t.getRow(int(w))
} else {
return last
}
}
r, err := e.next()
if err != nil {
return last
}
w = now.getCmd(r)
if err != nil {
return last
}
if w >= 0 {
return []rune(t.cmds[w])
}
return last
}
func (t *trie) String() string {
rv := ""
for _, cmd := range t.cmds {
rv += fmt.Sprintf("cmd: %s\n", string(cmd))
}
for _, row := range t.rows {
rv += fmt.Sprintf("row: %v\n", row)
}
return rv
}
================================================
FILE: analysis/lang/pl/stop_filter_pl.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pl
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/pl/stop_words_pl.go
================================================
package pl
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_pl"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var PolishStopWords = []byte(` | From https://github.com/stopwords-iso/stopwords-pl/tree/master
| The MIT License (MIT)
| See https://github.com/stopwords-iso/stopwords-pl/blob/master/LICENSE
| - Encoding was converted to UTF-8.
| - This notice was added.
| - english text is auto-translate
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| a polish stop word list. comments begin with vertical bar. each stop
| word is at the start of a line.
a | and
aby | to
ach | ah
acz | although
aczkolwiek | although
aj | ay
albo | or
ale | but
ależ | but
ani | or
aż | until
bardziej | more
bardzo | very
bez | without
bo | because
bowiem | because
by | by
byli | were
bym | i would
bynajmniej | not at all
być | to be
był | was
była | was
było | was
były | were
będzie | will be
będą | they will
cali | inches
cała | whole
cały | whole
chce | i want
choć | though
ci | you
ciebie | you
cię | you
co | what
cokolwiek | whatever
coraz | getting
coś | something
czasami | sometimes
czasem | sometimes
czemu | why
czy | whether
czyli | that is
często | often
daleko | far
dla | for
dlaczego | why
dlatego | which is why
do | down
dobrze | all right
dokąd | where
dość | enough
dr | dr
dużo | a lot
dwa | two
dwaj | two
dwie | two
dwoje | two
dzisiaj | today
dziś | today
gdy | when
gdyby | if
gdyż | because
gdzie | where
gdziekolwiek | wherever
gdzieś | somewhere
go | him
godz | time
hab | hab
i | and
ich | their
ii | ii
iii | iii
ile | how much
im | them
inna | different
inne | other
inny | other
innych | other
inż | eng
iv | iv
ix | ix
iż | that
ja | i
jak | how
jakaś | some
jakby | as if
jaki | what
jakichś | some
jakie | what
jakiś | some
jakiż | what
jakkolwiek | however
jako | as
jakoś | somehow
je | them
jeden | one
jedna | one
jednak | but
jednakże | however
jedno | one
jednym | one
jedynie | only
jego | his
jej | her
jemu | him
jest | is
jestem | i am
jeszcze | still
jeśli | if
jeżeli | if
już | already
ją | i
każdy | everyone
kiedy | when
kierunku | direction
kilka | several
kilku | several
kimś | someone
kto | who
ktokolwiek | anyone
ktoś | someone
która | which
które | which
którego | whose
której | which
który | which
których | which
którym | which
którzy | who
ku | to
lat | years
lecz | but
lub | or
ma | has
mają | may
mam | i have
mamy | we have
mało | little
mgr | msc
mi | to me
miał | had
mimo | despite
między | between
mnie | me
mną | me
mogą | they can
moi | my
moim | my
moja | my
moje | my
może | maybe
możliwe | that's possible
można | you can
mu | him
musi | has to
my | we
mój | my
na | on
nad | above
nam | u.s
nami | us
nas | us
nasi | our
nasz | our
nasza | our
nasze | our
naszego | our
naszych | ours
natomiast | whereas
natychmiast | immediately
nawet | even
nic | nothing
nich | them
nie | no
niech | let
niego | him
niej | her
niemu | not him
nigdy | never
nim | him
nimi | them
nią | her
niż | than
no | yeah
nowe | new
np | e.g.
nr | no
o | about
o.o. | o.o.
obok | near
od | from
ok | approx
około | about
on | he
ona | she
one | they
oni | they
ono | it
oraz | and
oto | here
owszem | yes
pan | mr
pana | mr
pani | you
pl | pl
po | after
pod | under
podczas | while
pomimo | despite
ponad | above
ponieważ | because
powinien | should
powinna | she should
powinni | they should
powinno | should
poza | apart from
prawie | almost
prof | prof
przecież | yet
przed | before
przede | above
przedtem | before
przez | by
przy | by
raz | once
razie | case
roku | year
również | also
sam | alone
sama | alone
się | myself
skąd | from where
sobie | myself
sobą | myself
sposób | way
swoje | own
są | are
ta | this
tak | yes
taka | such
taki | such
takich | such
takie | such
także | too
tam | over there
te | these
tego | this
tej | this one
tel | phone
temu | ago
ten | this
teraz | now
też | too
to | this
tobie | you
tobą | you
toteż | this as well
totobą | you
trzeba | it's necessary to
tu | here
tutaj | here
twoi | yours
twoim | yours
twoja | your
twoje | your
twym | your
twój | your
ty | you
tych | these
tylko | just
tym | this
tys | thousand
tzw | so-called
tę | these
u | at
ul | st
vi | vi
vii | vii
viii | viii
vol | vol
w | in
wam | you
wami | you
was | mustache
wasi | yours
wasz | yours
wasza | yours
wasze | yours
we | in
według | according to
wie | knows
wiele | many
wielu | many
więc | so
więcej | more
wszyscy | all
wszystkich | everyone
wszystkie | all
wszystkim | everyone
wszystko | all
wtedy | then
www | www
wy | you
właśnie | exactly
wśród | among
xi | x.x
xii | xii
xiii | xii
xiv | xiv
xv | xv
z | with
za | behind
zapewne | probably
zawsze | always
zaś | and
ze | that
zeznowu | testify
znowu | again
znów | again
został | left
zł | zloty
żaden | no
żadna | none
żadne | none
żadnych | none
że | that
żeby | to
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(PolishStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/pt/analyzer_pt.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pt
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "pt"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopPtFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerPtFilter, err := cache.TokenFilterNamed(LightStemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopPtFilter,
stemmerPtFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/pt/analyzer_pt_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pt
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestPortugueseAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stemming
{
input: []byte("quilométricas"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("quilometric"),
},
},
},
{
input: []byte("quilométricos"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("quilometric"),
},
},
},
// stop word
{
input: []byte("não"),
output: analysis.TokenStream{},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}
================================================
FILE: analysis/lang/pt/light_stemmer_pt.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pt
import (
"bytes"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const LightStemmerName = "stemmer_pt_light"
type PortugueseLightStemmerFilter struct {
}
func NewPortugueseLightStemmerFilter() *PortugueseLightStemmerFilter {
return &PortugueseLightStemmerFilter{}
}
func (s *PortugueseLightStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
runes := bytes.Runes(token.Term)
runes = stem(runes)
token.Term = analysis.BuildTermFromRunes(runes)
}
return input
}
func stem(input []rune) []rune {
inputLen := len(input)
if inputLen < 4 {
return input
}
input = removeSuffix(input)
inputLen = len(input)
if inputLen > 3 && input[inputLen-1] == 'a' {
input = normFeminine(input)
inputLen = len(input)
}
if inputLen > 4 {
switch input[inputLen-1] {
case 'e', 'a', 'o':
input = input[0 : inputLen-1]
inputLen = len(input)
}
}
for i := 0; i < inputLen; i++ {
switch input[i] {
case 'à', 'á', 'â', 'ä', 'ã':
input[i] = 'a'
case 'ò', 'ó', 'ô', 'ö', 'õ':
input[i] = 'o'
case 'è', 'é', 'ê', 'ë':
input[i] = 'e'
case 'ù', 'ú', 'û', 'ü':
input[i] = 'u'
case 'ì', 'í', 'î', 'ï':
input[i] = 'i'
case 'ç':
input[i] = 'c'
}
}
return input
}
func removeSuffix(input []rune) []rune {
inputLen := len(input)
if inputLen > 4 && analysis.RunesEndsWith(input, "es") {
switch input[inputLen-3] {
case 'r', 's', 'l', 'z':
return input[0 : inputLen-2]
}
}
if inputLen > 3 && analysis.RunesEndsWith(input, "ns") {
input[inputLen-2] = 'm'
return input[0 : inputLen-1]
}
if inputLen > 4 && (analysis.RunesEndsWith(input, "eis") || analysis.RunesEndsWith(input, "éis")) {
input[inputLen-3] = 'e'
input[inputLen-2] = 'l'
return input[0 : inputLen-1]
}
if inputLen > 4 && analysis.RunesEndsWith(input, "ais") {
input[inputLen-2] = 'l'
return input[0 : inputLen-1]
}
if inputLen > 4 && analysis.RunesEndsWith(input, "óis") {
input[inputLen-3] = 'o'
input[inputLen-2] = 'l'
return input[0 : inputLen-1]
}
if inputLen > 4 && analysis.RunesEndsWith(input, "is") {
input[inputLen-1] = 'l'
return input
}
if inputLen > 3 &&
(analysis.RunesEndsWith(input, "ões") ||
analysis.RunesEndsWith(input, "ães")) {
input = input[0 : inputLen-1]
inputLen = len(input)
input[inputLen-2] = 'ã'
input[inputLen-1] = 'o'
return input
}
if inputLen > 6 && analysis.RunesEndsWith(input, "mente") {
return input[0 : inputLen-5]
}
if inputLen > 3 && input[inputLen-1] == 's' {
return input[0 : inputLen-1]
}
return input
}
func normFeminine(input []rune) []rune {
inputLen := len(input)
if inputLen > 7 &&
(analysis.RunesEndsWith(input, "inha") ||
analysis.RunesEndsWith(input, "iaca") ||
analysis.RunesEndsWith(input, "eira")) {
input[inputLen-1] = 'o'
return input
}
if inputLen > 6 {
if analysis.RunesEndsWith(input, "osa") ||
analysis.RunesEndsWith(input, "ica") ||
analysis.RunesEndsWith(input, "ida") ||
analysis.RunesEndsWith(input, "ada") ||
analysis.RunesEndsWith(input, "iva") ||
analysis.RunesEndsWith(input, "ama") {
input[inputLen-1] = 'o'
return input
}
if analysis.RunesEndsWith(input, "ona") {
input[inputLen-3] = 'ã'
input[inputLen-2] = 'o'
return input[0 : inputLen-1]
}
if analysis.RunesEndsWith(input, "ora") {
return input[0 : inputLen-1]
}
if analysis.RunesEndsWith(input, "esa") {
input[inputLen-3] = 'ê'
return input[0 : inputLen-1]
}
if analysis.RunesEndsWith(input, "na") {
input[inputLen-1] = 'o'
return input
}
}
return input
}
func PortugueseLightStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewPortugueseLightStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(LightStemmerName, PortugueseLightStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/pt/light_stemmer_pt_test.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pt
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestPortugueseLightStemmer(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("doutores"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("doutor"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("doutor"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("doutor"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("homens"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("homem"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("homem"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("homem"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("papéis"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("papel"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("papel"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("papel"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("normais"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("normal"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("normal"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("normal"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("lencóis"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("lencol"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("lencol"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("lencol"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("barris"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("barril"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("barril"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("barril"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("botões"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("bota"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("botão"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("bota"),
},
},
},
// longer
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("o"),
},
&analysis.Token{
Term: []byte("debate"),
},
&analysis.Token{
Term: []byte("político"),
},
&analysis.Token{
Term: []byte("pelo"),
},
&analysis.Token{
Term: []byte("menos"),
},
&analysis.Token{
Term: []byte("o"),
},
&analysis.Token{
Term: []byte("que"),
},
&analysis.Token{
Term: []byte("vem"),
},
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("público"),
},
&analysis.Token{
Term: []byte("parece"),
},
&analysis.Token{
Term: []byte("de"),
},
&analysis.Token{
Term: []byte("modo"),
},
&analysis.Token{
Term: []byte("nada"),
},
&analysis.Token{
Term: []byte("surpreendente"),
},
&analysis.Token{
Term: []byte("restrito"),
},
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("temas"),
},
&analysis.Token{
Term: []byte("menores"),
},
&analysis.Token{
Term: []byte("mas"),
},
&analysis.Token{
Term: []byte("há"),
},
&analysis.Token{
Term: []byte("evidentemente"),
},
&analysis.Token{
Term: []byte("grandes"),
},
&analysis.Token{
Term: []byte("questões"),
},
&analysis.Token{
Term: []byte("em"),
},
&analysis.Token{
Term: []byte("jogo"),
},
&analysis.Token{
Term: []byte("nas"),
},
&analysis.Token{
Term: []byte("eleições"),
},
&analysis.Token{
Term: []byte("que"),
},
&analysis.Token{
Term: []byte("se"),
},
&analysis.Token{
Term: []byte("aproximam"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("o"),
},
&analysis.Token{
Term: []byte("debat"),
},
&analysis.Token{
Term: []byte("politic"),
},
&analysis.Token{
Term: []byte("pelo"),
},
&analysis.Token{
Term: []byte("meno"),
},
&analysis.Token{
Term: []byte("o"),
},
&analysis.Token{
Term: []byte("que"),
},
&analysis.Token{
Term: []byte("vem"),
},
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("public"),
},
&analysis.Token{
Term: []byte("parec"),
},
&analysis.Token{
Term: []byte("de"),
},
&analysis.Token{
Term: []byte("modo"),
},
&analysis.Token{
Term: []byte("nada"),
},
&analysis.Token{
Term: []byte("surpreendent"),
},
&analysis.Token{
Term: []byte("restrit"),
},
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("tema"),
},
&analysis.Token{
Term: []byte("menor"),
},
&analysis.Token{
Term: []byte("mas"),
},
&analysis.Token{
Term: []byte("há"),
},
&analysis.Token{
Term: []byte("evident"),
},
&analysis.Token{
Term: []byte("grand"),
},
&analysis.Token{
Term: []byte("questa"),
},
&analysis.Token{
Term: []byte("em"),
},
&analysis.Token{
Term: []byte("jogo"),
},
&analysis.Token{
Term: []byte("nas"),
},
&analysis.Token{
Term: []byte("eleica"),
},
&analysis.Token{
Term: []byte("que"),
},
&analysis.Token{
Term: []byte("se"),
},
&analysis.Token{
Term: []byte("aproximam"),
},
},
},
}
cache := registry.NewCache()
filter, err := cache.TokenFilterNamed(LightStemmerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := filter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/pt/stop_filter_pt.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package pt
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/pt/stop_words_pt.go
================================================
package pt
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_pt"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var PortugueseStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Portuguese stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
| The following is a ranked list (commonest to rarest) of stopwords
| deriving from a large sample of text.
| Extra words have been added at the end.
de | of, from
a | the; to, at; her
o | the; him
que | who, that
e | and
do | de + o
da | de + a
em | in
um | a
para | for
| é from SER
com | with
não | not, no
uma | a
os | the; them
no | em + o
se | himself etc
na | em + a
por | for
mais | more
as | the; them
dos | de + os
como | as, like
mas | but
| foi from SER
ao | a + o
ele | he
das | de + as
| tem from TER
à | a + a
seu | his
sua | her
ou | or
| ser from SER
quando | when
muito | much
| há from HAV
nos | em + os; us
já | already, now
| está from EST
eu | I
também | also
só | only, just
pelo | per + o
pela | per + a
até | up to
isso | that
ela | he
entre | between
| era from SER
depois | after
sem | without
mesmo | same
aos | a + os
| ter from TER
seus | his
quem | whom
nas | em + as
me | me
esse | that
eles | they
| estão from EST
você | you
| tinha from TER
| foram from SER
essa | that
num | em + um
nem | nor
suas | her
meu | my
às | a + as
minha | my
| têm from TER
numa | em + uma
pelos | per + os
elas | they
| havia from HAV
| seja from SER
qual | which
| será from SER
nós | we
| tenho from TER
lhe | to him, her
deles | of them
essas | those
esses | those
pelas | per + as
este | this
| fosse from SER
dele | of him
| other words. There are many contractions such as naquele = em+aquele,
| mo = me+o, but they are rare.
| Indefinite article plural forms are also rare.
tu | thou
te | thee
vocês | you (plural)
vos | you
lhes | to them
meus | my
minhas
teu | thy
tua
teus
tuas
nosso | our
nossa
nossos
nossas
dela | of her
delas | of them
esta | this
estes | these
estas | these
aquele | that
aquela | that
aqueles | those
aquelas | those
isto | this
aquilo | that
| forms of estar, to be (not including the infinitive):
estou
está
estamos
estão
estive
esteve
estivemos
estiveram
estava
estávamos
estavam
estivera
estivéramos
esteja
estejamos
estejam
estivesse
estivéssemos
estivessem
estiver
estivermos
estiverem
| forms of haver, to have (not including the infinitive):
hei
há
havemos
hão
houve
houvemos
houveram
houvera
houvéramos
haja
hajamos
hajam
houvesse
houvéssemos
houvessem
houver
houvermos
houverem
houverei
houverá
houveremos
houverão
houveria
houveríamos
houveriam
| forms of ser, to be (not including the infinitive):
sou
somos
são
era
éramos
eram
fui
foi
fomos
foram
fora
fôramos
seja
sejamos
sejam
fosse
fôssemos
fossem
for
formos
forem
serei
será
seremos
serão
seria
seríamos
seriam
| forms of ter, to have (not including the infinitive):
tenho
tem
temos
tém
tinha
tínhamos
tinham
tive
teve
tivemos
tiveram
tivera
tivéramos
tenha
tenhamos
tenham
tivesse
tivéssemos
tivessem
tiver
tivermos
tiverem
terei
terá
teremos
terão
teria
teríamos
teriam
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(PortugueseStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ro/analyzer_ro.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ro
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "ro"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopRoFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerRoFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: unicodeTokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopRoFilter,
stemmerRoFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ro/analyzer_ro_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ro
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestRomanianAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stemming
{
input: []byte("absenţa"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("absenţ"),
},
},
},
{
input: []byte("absenţi"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("absenţ"),
},
},
},
// stop word
{
input: []byte("îl"),
output: analysis.TokenStream{},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}
================================================
FILE: analysis/lang/ro/stemmer_ro.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ro
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/snowballstem"
"github.com/blevesearch/snowballstem/romanian"
)
const SnowballStemmerName = "stemmer_ro_snowball"
type RomanianStemmerFilter struct {
}
func NewRomanianStemmerFilter() *RomanianStemmerFilter {
return &RomanianStemmerFilter{}
}
func (s *RomanianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
env := snowballstem.NewEnv(string(token.Term))
romanian.Stem(env)
token.Term = []byte(env.Current())
}
return input
}
func RomanianStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewRomanianStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(SnowballStemmerName, RomanianStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ro/stop_filter_ro.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ro
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ro/stop_words_ro.go
================================================
package ro
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_ro"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/
// ` was changed to ' to allow for literal string
var RomanianStopWords = []byte(`# This file was created by Jacques Savoy and is distributed under the BSD license.
# See http://members.unine.ch/jacques.savoy/clef/index.html.
# Also see http://www.opensource.org/licenses/bsd-license.html
acea
aceasta
această
aceea
acei
aceia
acel
acela
acele
acelea
acest
acesta
aceste
acestea
aceşti
aceştia
acolo
acum
ai
aia
aibă
aici
al
ăla
ale
alea
ălea
altceva
altcineva
am
ar
are
aş
aşadar
asemenea
asta
ăsta
astăzi
astea
ăstea
ăştia
asupra
aţi
au
avea
avem
aveţi
azi
bine
bucur
bună
ca
că
căci
când
care
cărei
căror
cărui
cât
câte
câţi
către
câtva
ce
cel
ceva
chiar
cînd
cine
cineva
cît
cîte
cîţi
cîtva
contra
cu
cum
cumva
curând
curînd
da
dă
dacă
dar
datorită
de
deci
deja
deoarece
departe
deşi
din
dinaintea
dintr
dintre
drept
după
ea
ei
el
ele
eram
este
eşti
eu
face
fără
fi
fie
fiecare
fii
fim
fiţi
iar
ieri
îi
îl
îmi
împotriva
în
înainte
înaintea
încât
încît
încotro
între
întrucât
întrucît
îţi
la
lângă
le
li
lîngă
lor
lui
mă
mâine
mea
mei
mele
mereu
meu
mi
mine
mult
multă
mulţi
ne
nicăieri
nici
nimeni
nişte
noastră
noastre
noi
noştri
nostru
nu
ori
oricând
oricare
oricât
orice
oricînd
oricine
oricît
oricum
oriunde
până
pe
pentru
peste
pînă
poate
pot
prea
prima
primul
prin
printr
sa
să
săi
sale
sau
său
se
şi
sînt
sîntem
sînteţi
spre
sub
sunt
suntem
sunteţi
ta
tăi
tale
tău
te
ţi
ţie
tine
toată
toate
tot
toţi
totuşi
tu
un
una
unde
undeva
unei
unele
uneori
unor
vă
vi
voastră
voastre
voi
voştri
vostru
vouă
vreo
vreun
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(RomanianStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ru/analyzer_ru.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ru
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "ru"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
tokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopRuFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerRuFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: tokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopRuFilter,
stemmerRuFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ru/analyzer_ru_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ru
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestRussianAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stemming
{
input: []byte("километрах"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("километр"),
},
},
},
{
input: []byte("актеров"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("актер"),
},
},
},
// stop word
{
input: []byte("как"),
output: analysis.TokenStream{},
},
// digits safe
{
input: []byte("text 1000"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("text"),
},
&analysis.Token{
Term: []byte("1000"),
},
},
},
{
input: []byte("Вместе с тем о силе электромагнитной энергии имели представление еще"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("вмест"),
},
&analysis.Token{
Term: []byte("сил"),
},
&analysis.Token{
Term: []byte("электромагнитн"),
},
&analysis.Token{
Term: []byte("энерг"),
},
&analysis.Token{
Term: []byte("имел"),
},
&analysis.Token{
Term: []byte("представлен"),
},
},
},
{
input: []byte("Но знание это хранилось в тайне"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("знан"),
},
&analysis.Token{
Term: []byte("эт"),
},
&analysis.Token{
Term: []byte("хран"),
},
&analysis.Token{
Term: []byte("тайн"),
},
},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}
================================================
FILE: analysis/lang/ru/stemmer_ru.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ru
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/snowballstem"
"github.com/blevesearch/snowballstem/russian"
)
const SnowballStemmerName = "stemmer_ru_snowball"
type RussianStemmerFilter struct {
}
func NewRussianStemmerFilter() *RussianStemmerFilter {
return &RussianStemmerFilter{}
}
func (s *RussianStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
env := snowballstem.NewEnv(string(token.Term))
russian.Stem(env)
token.Term = []byte(env.Current())
}
return input
}
func RussianStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewRussianStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(SnowballStemmerName, RussianStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ru/stemmer_ru_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ru
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestSnowballRussianStemmer(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("актеров"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("актер"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("километров"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("километр"),
},
},
},
}
cache := registry.NewCache()
filter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := filter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/ru/stop_filter_ru.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ru
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/ru/stop_words_ru.go
================================================
package ru
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_ru"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var RussianStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| a russian stop word list. comments begin with vertical bar. each stop
| word is at the start of a line.
| this is a ranked list (commonest to rarest) of stopwords derived from
| a large text sample.
| letter 'ё' is translated to 'е'.
и | and
в | in/into
во | alternative form
не | not
что | what/that
он | he
на | on/onto
я | i
с | from
со | alternative form
как | how
а | milder form of 'no' (but)
то | conjunction and form of 'that'
все | all
она | she
так | so, thus
его | him
но | but
да | yes/and
ты | thou
к | towards, by
у | around, chez
же | intensifier particle
вы | you
за | beyond, behind
бы | conditional/subj. particle
по | up to, along
только | only
ее | her
мне | to me
было | it was
вот | here is/are, particle
от | away from
меня | me
еще | still, yet, more
нет | no, there isnt/arent
о | about
из | out of
ему | to him
теперь | now
когда | when
даже | even
ну | so, well
вдруг | suddenly
ли | interrogative particle
если | if
уже | already, but homonym of 'narrower'
или | or
ни | neither
быть | to be
был | he was
него | prepositional form of его
до | up to
вас | you accusative
нибудь | indef. suffix preceded by hyphen
опять | again
уж | already, but homonym of 'adder'
вам | to you
сказал | he said
ведь | particle 'after all'
там | there
потом | then
себя | oneself
ничего | nothing
ей | to her
может | usually with 'быть' as 'maybe'
они | they
тут | here
где | where
есть | there is/are
надо | got to, must
ней | prepositional form of ей
для | for
мы | we
тебя | thee
их | them, their
чем | than
была | she was
сам | self
чтоб | in order to
без | without
будто | as if
человек | man, person, one
чего | genitive form of 'what'
раз | once
тоже | also
себе | to oneself
под | beneath
жизнь | life
будет | will be
ж | short form of intensifer particle 'же'
тогда | then
кто | who
этот | this
говорил | was saying
того | genitive form of 'that'
потому | for that reason
этого | genitive form of 'this'
какой | which
совсем | altogether
ним | prepositional form of 'его', 'они'
здесь | here
этом | prepositional form of 'этот'
один | one
почти | almost
мой | my
тем | instrumental/dative plural of 'тот', 'то'
чтобы | full form of 'in order that'
нее | her (acc.)
кажется | it seems
сейчас | now
были | they were
куда | where to
зачем | why
сказать | to say
всех | all (acc., gen. preposn. plural)
никогда | never
сегодня | today
можно | possible, one can
при | by
наконец | finally
два | two
об | alternative form of 'о', about
другой | another
хоть | even
после | after
над | above
больше | more
тот | that one (masc.)
через | across, in
эти | these
нас | us
про | about
всего | in all, only, of all
них | prepositional form of 'они' (they)
какая | which, feminine
много | lots
разве | interrogative particle
сказала | she said
три | three
эту | this, acc. fem. sing.
моя | my, feminine
впрочем | moreover, besides
хорошо | good
свою | ones own, acc. fem. sing.
этой | oblique form of 'эта', fem. 'this'
перед | in front of
иногда | sometimes
лучше | better
чуть | a little
том | preposn. form of 'that one'
нельзя | one must not
такой | such a one
им | to them
более | more
всегда | always
конечно | of course
всю | acc. fem. sing of 'all'
между | between
| b: some paradigms
|
| personal pronouns
|
| я меня мне мной [мною]
| ты тебя тебе тобой [тобою]
| он его ему им [него, нему, ним]
| она ее эи ею [нее, нэи, нею]
| оно его ему им [него, нему, ним]
|
| мы нас нам нами
| вы вас вам вами
| они их им ими [них, ним, ними]
|
| себя себе собой [собою]
|
| demonstrative pronouns: этот (this), тот (that)
|
| этот эта это эти
| этого эты это эти
| этого этой этого этих
| этому этой этому этим
| этим этой этим [этою] этими
| этом этой этом этих
|
| тот та то те
| того ту то те
| того той того тех
| тому той тому тем
| тем той тем [тою] теми
| том той том тех
|
| determinative pronouns
|
| (a) весь (all)
|
| весь вся все все
| всего всю все все
| всего всей всего всех
| всему всей всему всем
| всем всей всем [всею] всеми
| всем всей всем всех
|
| (b) сам (himself etc)
|
| сам сама само сами
| самого саму само самих
| самого самой самого самих
| самому самой самому самим
| самим самой самим [самою] самими
| самом самой самом самих
|
| stems of verbs 'to be', 'to have', 'to do' and modal
|
| быть бы буд быв есть суть
| име
| дел
| мог мож мочь
| уме
| хоч хот
| долж
| можн
| нужн
| нельзя
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(RussianStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/sv/analyzer_sv.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package sv
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "sv"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopSvFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerSvFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: unicodeTokenizer,
TokenFilters: []analysis.TokenFilter{
toLowerFilter,
stopSvFilter,
stemmerSvFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/sv/analyzer_sv_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package sv
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestSwedishAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stemming
{
input: []byte("jaktkarlarne"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("jaktkarl"),
},
},
},
{
input: []byte("jaktkarlens"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("jaktkarl"),
},
},
},
// stop word
{
input: []byte("och"),
output: analysis.TokenStream{},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}
================================================
FILE: analysis/lang/sv/stemmer_sv.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package sv
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/snowballstem"
"github.com/blevesearch/snowballstem/swedish"
)
const SnowballStemmerName = "stemmer_sv_snowball"
type SwedishStemmerFilter struct {
}
func NewSwedishStemmerFilter() *SwedishStemmerFilter {
return &SwedishStemmerFilter{}
}
func (s *SwedishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
env := snowballstem.NewEnv(string(token.Term))
swedish.Stem(env)
token.Term = []byte(env.Current())
}
return input
}
func SwedishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewSwedishStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(SnowballStemmerName, SwedishStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/sv/stop_filter_sv.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package sv
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/sv/stop_words_sv.go
================================================
package sv
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_sv"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var SwedishStopWords = []byte(` | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
| This file is distributed under the BSD License.
| See http://snowball.tartarus.org/license.php
| Also see http://www.opensource.org/licenses/bsd-license.html
| - Encoding was converted to UTF-8.
| - This notice was added.
|
| NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
| A Swedish stop word list. Comments begin with vertical bar. Each stop
| word is at the start of a line.
| This is a ranked list (commonest to rarest) of stopwords derived from
| a large text sample.
| Swedish stop words occasionally exhibit homonym clashes. For example
| så = so, but also seed. These are indicated clearly below.
och | and
det | it, this/that
att | to (with infinitive)
i | in, at
en | a
jag | I
hon | she
som | who, that
han | he
på | on
den | it, this/that
med | with
var | where, each
sig | him(self) etc
för | for
så | so (also: seed)
till | to
är | is
men | but
ett | a
om | if; around, about
hade | had
de | they, these/those
av | of
icke | not, no
mig | me
du | you
henne | her
då | then, when
sin | his
nu | now
har | have
inte | inte någon = no one
hans | his
honom | him
skulle | 'sake'
hennes | her
där | there
min | my
man | one (pronoun)
ej | nor
vid | at, by, on (also: vast)
kunde | could
något | some etc
från | from, off
ut | out
när | when
efter | after, behind
upp | up
vi | we
dem | them
vara | be
vad | what
över | over
än | than
dig | you
kan | can
sina | his
här | here
ha | have
mot | towards
alla | all
under | under (also: wonder)
någon | some etc
eller | or (else)
allt | all
mycket | much
sedan | since
ju | why
denna | this/that
själv | myself, yourself etc
detta | this/that
åt | to
utan | without
varit | was
hur | how
ingen | no
mitt | my
ni | you
bli | to be, become
blev | from bli
oss | us
din | thy
dessa | these/those
några | some etc
deras | their
blir | from bli
mina | my
samma | (the) same
vilken | who, that
er | you, your
sådan | such a
vår | our
blivit | from bli
dess | its
inom | within
mellan | between
sådant | such a
varför | why
varje | each
vilka | who, that
ditt | thy
vem | who
vilket | who, that
sitta | his
sådana | such a
vart | each
dina | thy
vars | whose
vårt | our
våra | our
ert | your
era | your
vilkas | whose
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(SwedishStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/tr/analyzer_tr.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tr
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/analysis/token/apostrophe"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
)
const AnalyzerName = "tr"
func AnalyzerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Analyzer, error) {
unicodeTokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
aposFilter, err := cache.TokenFilterNamed(apostrophe.Name)
if err != nil {
return nil, err
}
toLowerFilter, err := cache.TokenFilterNamed(lowercase.Name)
if err != nil {
return nil, err
}
stopTrFilter, err := cache.TokenFilterNamed(StopName)
if err != nil {
return nil, err
}
stemmerTrFilter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
return nil, err
}
rv := analysis.DefaultAnalyzer{
Tokenizer: unicodeTokenizer,
TokenFilters: []analysis.TokenFilter{
aposFilter,
toLowerFilter,
stopTrFilter,
stemmerTrFilter,
},
}
return &rv, nil
}
func init() {
err := registry.RegisterAnalyzer(AnalyzerName, AnalyzerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/tr/analyzer_tr_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tr
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestTurkishAnalyzer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
// stemming
{
input: []byte("ağacı"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ağaç"),
},
},
},
{
input: []byte("ağaç"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ağaç"),
},
},
},
// stop word
{
input: []byte("dolayı"),
output: analysis.TokenStream{},
},
// apostrophes
{
input: []byte("Kıbrıs'ta"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("kıbrıs"),
},
},
},
{
input: []byte("Van Gölü'ne"),
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("van"),
},
&analysis.Token{
Term: []byte("göl"),
},
},
},
}
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(AnalyzerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := analyzer.Analyze(test.input)
if len(actual) != len(test.output) {
t.Fatalf("expected length: %d, got %d", len(test.output), len(actual))
}
for i, tok := range actual {
if !reflect.DeepEqual(tok.Term, test.output[i].Term) {
t.Errorf("expected term %s (% x) got %s (% x)", test.output[i].Term, test.output[i].Term, tok.Term, tok.Term)
}
}
}
}
================================================
FILE: analysis/lang/tr/stemmer_tr.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tr
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/snowballstem"
"github.com/blevesearch/snowballstem/turkish"
)
const SnowballStemmerName = "stemmer_tr_snowball"
type TurkishStemmerFilter struct {
}
func NewTurkishStemmerFilter() *TurkishStemmerFilter {
return &TurkishStemmerFilter{}
}
func (s *TurkishStemmerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
env := snowballstem.NewEnv(string(token.Term))
turkish.Stem(env)
token.Term = []byte(env.Current())
}
return input
}
func TurkishStemmerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewTurkishStemmerFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(SnowballStemmerName, TurkishStemmerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/tr/stemmer_tr_test.go
================================================
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tr
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestSnowballTurkishStemmer(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("kimsesizler"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("kimsesiz"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("kitaplar"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("kitap"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("arabanın"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("araba"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("bardaklar"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("bardak"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("kediye"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("kedi"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("yazdım"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("yaz"),
},
},
},
}
cache := registry.NewCache()
filter, err := cache.TokenFilterNamed(SnowballStemmerName)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := filter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/lang/tr/stop_filter_tr.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package tr
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/token/stop"
"github.com/blevesearch/bleve/v2/registry"
)
func StopTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
tokenMap, err := cache.TokenMapNamed(StopName)
if err != nil {
return nil, err
}
return stop.NewStopTokensFilter(tokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(StopName, StopTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/lang/tr/stop_words_tr.go
================================================
package tr
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const StopName = "stop_tr"
// this content was obtained from:
// lucene-4.7.2/analysis/common/src/resources/org/apache/lucene/analysis/snowball/
// ` was changed to ' to allow for literal string
var TurkishStopWords = []byte(`# Turkish stopwords from LUCENE-559
# merged with the list from "Information Retrieval on Turkish Texts"
# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
acaba
altmış
altı
ama
ancak
arada
aslında
ayrıca
bana
bazı
belki
ben
benden
beni
benim
beri
beş
bile
bin
bir
birçok
biri
birkaç
birkez
birşey
birşeyi
biz
bize
bizden
bizi
bizim
böyle
böylece
bu
buna
bunda
bundan
bunlar
bunları
bunların
bunu
bunun
burada
çok
çünkü
da
daha
dahi
de
defa
değil
diğer
diye
doksan
dokuz
dolayı
dolayısıyla
dört
edecek
eden
ederek
edilecek
ediliyor
edilmesi
ediyor
eğer
elli
en
etmesi
etti
ettiği
ettiğini
gibi
göre
halen
hangi
hatta
hem
henüz
hep
hepsi
her
herhangi
herkesin
hiç
hiçbir
için
iki
ile
ilgili
ise
işte
itibaren
itibariyle
kadar
karşın
katrilyon
kendi
kendilerine
kendini
kendisi
kendisine
kendisini
kez
ki
kim
kimden
kime
kimi
kimse
kırk
milyar
milyon
mu
mü
mı
nasıl
ne
neden
nedenle
nerde
nerede
nereye
niye
niçin
o
olan
olarak
oldu
olduğu
olduğunu
olduklarını
olmadı
olmadığı
olmak
olması
olmayan
olmaz
olsa
olsun
olup
olur
olursa
oluyor
on
ona
ondan
onlar
onlardan
onları
onların
onu
onun
otuz
oysa
öyle
pek
rağmen
sadece
sanki
sekiz
seksen
sen
senden
seni
senin
siz
sizden
sizi
sizin
şey
şeyden
şeyi
şeyler
şöyle
şu
şuna
şunda
şundan
şunları
şunu
tarafından
trilyon
tüm
üç
üzere
var
vardı
ve
veya
ya
yani
yapacak
yapılan
yapılması
yapıyor
yapmak
yaptı
yaptığı
yaptığını
yaptıkları
yedi
yerine
yetmiş
yine
yirmi
yoksa
yüz
zaten
`)
func TokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
err := rv.LoadBytes(TurkishStopWords)
return rv, err
}
func init() {
err := registry.RegisterTokenMap(StopName, TokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/test_words.txt
================================================
# full line comment
marty
steve # trailing comment
| different format of comment
dustin
siri | different style trailing comment
multiple words with different whitespace
================================================
FILE: analysis/token/apostrophe/apostrophe.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package apostrophe
import (
"bytes"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "apostrophe"
const RightSingleQuotationMark = "’"
const Apostrophe = "'"
const Apostrophes = Apostrophe + RightSingleQuotationMark
type ApostropheFilter struct{}
func NewApostropheFilter() *ApostropheFilter {
return &ApostropheFilter{}
}
func (s *ApostropheFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
firstApostrophe := bytes.IndexAny(token.Term, Apostrophes)
if firstApostrophe >= 0 {
// found an apostrophe
token.Term = token.Term[0:firstApostrophe]
}
}
return input
}
func ApostropheFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewApostropheFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, ApostropheFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/token/apostrophe/apostrophe_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package apostrophe
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestApostropheFilter(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Türkiye'de"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Türkiye"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("2003'te"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("2003"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Van"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Van"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Gölü'nü"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Gölü"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("gördüm"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("gördüm"),
},
},
},
}
for _, test := range tests {
apostropheFilter := NewApostropheFilter()
actual := apostropheFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/token/camelcase/camelcase.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package camelcase
import (
"bytes"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "camelCase"
// CamelCaseFilter splits a given token into a set of tokens where each resulting token
// falls into one the following classes:
// 1. Upper case followed by lower case letters.
// Terminated by a number, an upper case letter, and a non alpha-numeric symbol.
// 2. Upper case followed by upper case letters.
// Terminated by a number, an upper case followed by a lower case letter, and a non alpha-numeric symbol.
// 3. Lower case followed by lower case letters.
// Terminated by a number, an upper case letter, and a non alpha-numeric symbol.
// 4. Number followed by numbers.
// Terminated by a letter, and a non alpha-numeric symbol.
// 5. Non alpha-numeric symbol followed by non alpha-numeric symbols.
// Terminated by a number, and a letter.
//
// It does a one-time sequential pass over an input token, from left to right.
// The scan is greedy and generates the longest substring that fits into one of the classes.
//
// See the test file for examples of classes and their parsings.
type CamelCaseFilter struct{}
func NewCamelCaseFilter() *CamelCaseFilter {
return &CamelCaseFilter{}
}
func (f *CamelCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, len(input))
nextPosition := 1
for _, token := range input {
runeCount := utf8.RuneCount(token.Term)
runes := bytes.Runes(token.Term)
p := NewParser(runeCount, nextPosition, token.Start)
for i := 0; i < runeCount; i++ {
if i+1 >= runeCount {
p.Push(runes[i], nil)
} else {
p.Push(runes[i], &runes[i+1])
}
}
rv = append(rv, p.FlushTokens()...)
nextPosition = p.NextPosition()
}
return rv
}
func CamelCaseFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewCamelCaseFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, CamelCaseFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/token/camelcase/camelcase_test.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package camelcase
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestCamelCaseFilter(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: tokenStream(""),
output: tokenStream(""),
},
{
input: tokenStream("a"),
output: tokenStream("a"),
},
{
input: tokenStream("...aMACMac123macILoveGolang"),
output: tokenStream("...", "a", "MAC", "Mac", "123", "mac", "I", "Love", "Golang"),
},
{
input: tokenStream("Lang"),
output: tokenStream("Lang"),
},
{
input: tokenStream("GLang"),
output: tokenStream("G", "Lang"),
},
{
input: tokenStream("GOLang"),
output: tokenStream("GO", "Lang"),
},
{
input: tokenStream("GOOLang"),
output: tokenStream("GOO", "Lang"),
},
{
input: tokenStream("1234"),
output: tokenStream("1234"),
},
{
input: tokenStream("starbucks"),
output: tokenStream("starbucks"),
},
{
input: tokenStream("Starbucks TVSamsungIsGREAT000"),
output: tokenStream("Starbucks", " ", "TV", "Samsung", "Is", "GREAT", "000"),
},
}
for _, test := range tests {
ccFilter := NewCamelCaseFilter()
actual := ccFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s \n\n got %s", test.output, actual)
}
}
}
func tokenStream(termStrs ...string) analysis.TokenStream {
tokenStream := make([]*analysis.Token, len(termStrs))
index := 0
for i, termStr := range termStrs {
tokenStream[i] = &analysis.Token{
Term: []byte(termStr),
Position: i + 1,
Start: index,
End: index + len(termStr),
}
index += len(termStr)
}
return analysis.TokenStream(tokenStream)
}
================================================
FILE: analysis/token/camelcase/parser.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package camelcase
import (
"github.com/blevesearch/bleve/v2/analysis"
)
func (p *Parser) buildTokenFromTerm(buffer []rune) *analysis.Token {
term := analysis.BuildTermFromRunes(buffer)
token := &analysis.Token{
Term: term,
Position: p.position,
Start: p.index,
End: p.index + len(term),
}
p.position++
p.index += len(term)
return token
}
// Parser accepts a symbol and passes it to the current state (representing a class).
// The state can accept it (and accumulate it). Otherwise, the parser creates a new state that
// starts with the pushed symbol.
//
// Parser accumulates a new resulting token every time it switches state.
// Use FlushTokens() to get the results after the last symbol was pushed.
type Parser struct {
bufferLen int
buffer []rune
current State
tokens []*analysis.Token
position int
index int
}
func NewParser(length, position, index int) *Parser {
return &Parser{
bufferLen: length,
buffer: make([]rune, 0, length),
tokens: make([]*analysis.Token, 0, length),
position: position,
index: index,
}
}
func (p *Parser) Push(sym rune, peek *rune) {
if p.current == nil {
// the start of parsing
p.current = p.NewState(sym)
p.buffer = append(p.buffer, sym)
} else if p.current.Member(sym, peek) {
// same state, just accumulate
p.buffer = append(p.buffer, sym)
} else {
// the old state is no more, thus convert the buffer
p.tokens = append(p.tokens, p.buildTokenFromTerm(p.buffer))
// let the new state begin
p.current = p.NewState(sym)
p.buffer = make([]rune, 0, p.bufferLen)
p.buffer = append(p.buffer, sym)
}
}
// Note. States have to have different starting symbols.
func (p *Parser) NewState(sym rune) State {
var found State
found = &LowerCaseState{}
if found.StartSym(sym) {
return found
}
found = &UpperCaseState{}
if found.StartSym(sym) {
return found
}
found = &NumberCaseState{}
if found.StartSym(sym) {
return found
}
return &NonAlphaNumericCaseState{}
}
func (p *Parser) FlushTokens() []*analysis.Token {
p.tokens = append(p.tokens, p.buildTokenFromTerm(p.buffer))
return p.tokens
}
func (p *Parser) NextPosition() int {
return p.position
}
================================================
FILE: analysis/token/camelcase/states.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package camelcase
import (
"unicode"
)
// States codify the classes that the parser recognizes.
type State interface {
// is _sym_ the start character
StartSym(sym rune) bool
// is _sym_ a member of a class.
// peek, the next sym on the tape, can also be used to determine a class.
Member(sym rune, peek *rune) bool
}
type LowerCaseState struct{}
func (s *LowerCaseState) Member(sym rune, peek *rune) bool {
return unicode.IsLower(sym)
}
func (s *LowerCaseState) StartSym(sym rune) bool {
return s.Member(sym, nil)
}
type UpperCaseState struct {
startedCollecting bool // denotes that the start character has been read
collectingUpper bool // denotes if this is a class of all upper case letters
}
func (s *UpperCaseState) Member(sym rune, peek *rune) bool {
if !(unicode.IsLower(sym) || unicode.IsUpper(sym)) {
return false
}
if peek != nil && unicode.IsUpper(sym) && unicode.IsLower(*peek) {
return false
}
if !s.startedCollecting {
// now we have to determine if upper-case letters are collected.
s.startedCollecting = true
s.collectingUpper = unicode.IsUpper(sym)
return true
}
return s.collectingUpper == unicode.IsUpper(sym)
}
func (s *UpperCaseState) StartSym(sym rune) bool {
return unicode.IsUpper(sym)
}
type NumberCaseState struct{}
func (s *NumberCaseState) Member(sym rune, peek *rune) bool {
return unicode.IsNumber(sym)
}
func (s *NumberCaseState) StartSym(sym rune) bool {
return s.Member(sym, nil)
}
type NonAlphaNumericCaseState struct{}
func (s *NonAlphaNumericCaseState) Member(sym rune, peek *rune) bool {
return !unicode.IsLower(sym) && !unicode.IsUpper(sym) && !unicode.IsNumber(sym)
}
func (s *NonAlphaNumericCaseState) StartSym(sym rune) bool {
return s.Member(sym, nil)
}
================================================
FILE: analysis/token/compound/dict.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package compound
import (
"bytes"
"fmt"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "dict_compound"
const defaultMinWordSize = 5
const defaultMinSubWordSize = 2
const defaultMaxSubWordSize = 15
const defaultOnlyLongestMatch = false
type DictionaryCompoundFilter struct {
dict analysis.TokenMap
minWordSize int
minSubWordSize int
maxSubWordSize int
onlyLongestMatch bool
}
func NewDictionaryCompoundFilter(dict analysis.TokenMap, minWordSize, minSubWordSize, maxSubWordSize int, onlyLongestMatch bool) *DictionaryCompoundFilter {
return &DictionaryCompoundFilter{
dict: dict,
minWordSize: minWordSize,
minSubWordSize: minSubWordSize,
maxSubWordSize: maxSubWordSize,
onlyLongestMatch: onlyLongestMatch,
}
}
func (f *DictionaryCompoundFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, len(input))
for _, token := range input {
rv = append(rv, token)
tokenLen := utf8.RuneCount(token.Term)
if tokenLen >= f.minWordSize {
newtokens := f.decompose(token)
for _, newtoken := range newtokens {
rv = append(rv, newtoken)
}
}
}
return rv
}
func (f *DictionaryCompoundFilter) decompose(token *analysis.Token) []*analysis.Token {
runes := bytes.Runes(token.Term)
rv := make([]*analysis.Token, 0)
rlen := len(runes)
for i := 0; i <= (rlen - f.minSubWordSize); i++ {
var longestMatchToken *analysis.Token
for j := f.minSubWordSize; j <= f.maxSubWordSize; j++ {
if i+j > rlen {
break
}
_, inDict := f.dict[string(runes[i:i+j])]
if inDict {
newtoken := analysis.Token{
Term: []byte(string(runes[i : i+j])),
Position: token.Position,
Start: token.Start + i,
End: token.Start + i + j,
Type: token.Type,
KeyWord: token.KeyWord,
}
if f.onlyLongestMatch {
if longestMatchToken == nil || utf8.RuneCount(longestMatchToken.Term) < j {
longestMatchToken = &newtoken
}
} else {
rv = append(rv, &newtoken)
}
}
}
if f.onlyLongestMatch && longestMatchToken != nil {
rv = append(rv, longestMatchToken)
}
}
return rv
}
func DictionaryCompoundFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
minWordSize := defaultMinWordSize
minSubWordSize := defaultMinSubWordSize
maxSubWordSize := defaultMaxSubWordSize
onlyLongestMatch := defaultOnlyLongestMatch
minVal, ok := config["min_word_size"].(float64)
if ok {
minWordSize = int(minVal)
}
minSubVal, ok := config["min_subword_size"].(float64)
if ok {
minSubWordSize = int(minSubVal)
}
maxSubVal, ok := config["max_subword_size"].(float64)
if ok {
maxSubWordSize = int(maxSubVal)
}
onlyVal, ok := config["only_longest_match"].(bool)
if ok {
onlyLongestMatch = onlyVal
}
dictTokenMapName, ok := config["dict_token_map"].(string)
if !ok {
return nil, fmt.Errorf("must specify dict_token_map")
}
dictTokenMap, err := cache.TokenMapNamed(dictTokenMapName)
if err != nil {
return nil, fmt.Errorf("error building dict compound words filter: %v", err)
}
return NewDictionaryCompoundFilter(dictTokenMap, minWordSize, minSubWordSize, maxSubWordSize, onlyLongestMatch), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, DictionaryCompoundFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/token/compound/dict_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package compound
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/tokenmap"
"github.com/blevesearch/bleve/v2/registry"
)
func TestStopWordsFilter(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("i"),
Start: 0,
End: 1,
Position: 1,
},
&analysis.Token{
Term: []byte("like"),
Start: 2,
End: 6,
Position: 2,
},
&analysis.Token{
Term: []byte("to"),
Start: 7,
End: 9,
Position: 3,
},
&analysis.Token{
Term: []byte("play"),
Start: 10,
End: 14,
Position: 4,
},
&analysis.Token{
Term: []byte("softball"),
Start: 15,
End: 23,
Position: 5,
},
}
expectedTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("i"),
Start: 0,
End: 1,
Position: 1,
},
&analysis.Token{
Term: []byte("like"),
Start: 2,
End: 6,
Position: 2,
},
&analysis.Token{
Term: []byte("to"),
Start: 7,
End: 9,
Position: 3,
},
&analysis.Token{
Term: []byte("play"),
Start: 10,
End: 14,
Position: 4,
},
&analysis.Token{
Term: []byte("softball"),
Start: 15,
End: 23,
Position: 5,
},
&analysis.Token{
Term: []byte("soft"),
Start: 15,
End: 19,
Position: 5,
},
&analysis.Token{
Term: []byte("ball"),
Start: 19,
End: 23,
Position: 5,
},
}
cache := registry.NewCache()
dictListConfig := map[string]interface{}{
"type": tokenmap.Name,
"tokens": []interface{}{"factor", "soft", "ball", "team"},
}
_, err := cache.DefineTokenMap("dict_test", dictListConfig)
if err != nil {
t.Fatal(err)
}
dictConfig := map[string]interface{}{
"type": "dict_compound",
"dict_token_map": "dict_test",
}
dictFilter, err := cache.DefineTokenFilter("dict_test", dictConfig)
if err != nil {
t.Fatal(err)
}
ouputTokenStream := dictFilter.Filter(inputTokenStream)
if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
}
}
func TestStopWordsFilterLongestMatch(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("softestball"),
Start: 0,
End: 11,
Position: 1,
},
}
expectedTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("softestball"),
Start: 0,
End: 11,
Position: 1,
},
&analysis.Token{
Term: []byte("softest"),
Start: 0,
End: 7,
Position: 1,
},
&analysis.Token{
Term: []byte("ball"),
Start: 7,
End: 11,
Position: 1,
},
}
cache := registry.NewCache()
dictListConfig := map[string]interface{}{
"type": tokenmap.Name,
"tokens": []interface{}{"soft", "softest", "ball"},
}
_, err := cache.DefineTokenMap("dict_test", dictListConfig)
if err != nil {
t.Fatal(err)
}
dictConfig := map[string]interface{}{
"type": "dict_compound",
"dict_token_map": "dict_test",
"only_longest_match": true,
}
dictFilter, err := cache.DefineTokenFilter("dict_test", dictConfig)
if err != nil {
t.Fatal(err)
}
ouputTokenStream := dictFilter.Filter(inputTokenStream)
if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
}
}
================================================
FILE: analysis/token/edgengram/edgengram.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package edgengram
import (
"bytes"
"fmt"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "edge_ngram"
type Side bool
const BACK Side = true
const FRONT Side = false
type EdgeNgramFilter struct {
back Side
minLength int
maxLength int
}
func NewEdgeNgramFilter(side Side, minLength, maxLength int) *EdgeNgramFilter {
return &EdgeNgramFilter{
back: side,
minLength: minLength,
maxLength: maxLength,
}
}
func (s *EdgeNgramFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, len(input))
for _, token := range input {
runeCount := utf8.RuneCount(token.Term)
runes := bytes.Runes(token.Term)
if s.back {
i := runeCount
// index of the starting rune for this token
for ngramSize := s.minLength; ngramSize <= s.maxLength; ngramSize++ {
// build an ngram of this size starting at i
if i-ngramSize >= 0 {
ngramTerm := analysis.BuildTermFromRunes(runes[i-ngramSize : i])
token := analysis.Token{
Position: token.Position,
Start: token.Start,
End: token.End,
Type: token.Type,
Term: ngramTerm,
}
rv = append(rv, &token)
}
}
} else {
i := 0
// index of the starting rune for this token
for ngramSize := s.minLength; ngramSize <= s.maxLength; ngramSize++ {
// build an ngram of this size starting at i
if i+ngramSize <= runeCount {
ngramTerm := analysis.BuildTermFromRunes(runes[i : i+ngramSize])
token := analysis.Token{
Position: token.Position,
Start: token.Start,
End: token.End,
Type: token.Type,
Term: ngramTerm,
}
rv = append(rv, &token)
}
}
}
}
return rv
}
func EdgeNgramFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
side := FRONT
back, ok := config["back"].(bool)
if ok && back {
side = BACK
}
minVal, ok := config["min"].(float64)
if !ok {
return nil, fmt.Errorf("must specify min")
}
min := int(minVal)
maxVal, ok := config["max"].(float64)
if !ok {
return nil, fmt.Errorf("must specify max")
}
max := int(maxVal)
return NewEdgeNgramFilter(side, min, max), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, EdgeNgramFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/token/edgengram/edgengram_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package edgengram
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestEdgeNgramFilter(t *testing.T) {
tests := []struct {
side Side
min int
max int
input analysis.TokenStream
output analysis.TokenStream
}{
{
side: FRONT,
min: 1,
max: 1,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("abcde"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
},
},
{
side: BACK,
min: 1,
max: 1,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("abcde"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("e"),
},
},
},
{
side: FRONT,
min: 1,
max: 3,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("abcde"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("ab"),
},
&analysis.Token{
Term: []byte("abc"),
},
},
},
{
side: BACK,
min: 1,
max: 3,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("abcde"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("e"),
},
&analysis.Token{
Term: []byte("de"),
},
&analysis.Token{
Term: []byte("cde"),
},
},
},
{
side: FRONT,
min: 1,
max: 3,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("abcde"),
},
&analysis.Token{
Term: []byte("vwxyz"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("ab"),
},
&analysis.Token{
Term: []byte("abc"),
},
&analysis.Token{
Term: []byte("v"),
},
&analysis.Token{
Term: []byte("vw"),
},
&analysis.Token{
Term: []byte("vwx"),
},
},
},
{
side: BACK,
min: 3,
max: 5,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Beryl"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ryl"),
},
&analysis.Token{
Term: []byte("eryl"),
},
&analysis.Token{
Term: []byte("Beryl"),
},
},
},
{
side: FRONT,
min: 3,
max: 5,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Beryl"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Ber"),
},
&analysis.Token{
Term: []byte("Bery"),
},
&analysis.Token{
Term: []byte("Beryl"),
},
},
},
}
for _, test := range tests {
edgeNgramFilter := NewEdgeNgramFilter(test.side, test.min, test.max)
actual := edgeNgramFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output, actual)
}
}
}
================================================
FILE: analysis/token/elision/elision.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package elision
import (
"fmt"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "elision"
const RightSingleQuotationMark = '’'
const Apostrophe = '\''
type ElisionFilter struct {
articles analysis.TokenMap
}
func NewElisionFilter(articles analysis.TokenMap) *ElisionFilter {
return &ElisionFilter{
articles: articles,
}
}
func (s *ElisionFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
term := token.Term
for i := 0; i < len(term); {
r, size := utf8.DecodeRune(term[i:])
if r == Apostrophe || r == RightSingleQuotationMark {
// see if the prefix matches one of the articles
prefix := term[0:i]
_, articleMatch := s.articles[string(prefix)]
if articleMatch {
token.Term = term[i+size:]
break
}
}
i += size
}
}
return input
}
func ElisionFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
articlesTokenMapName, ok := config["articles_token_map"].(string)
if !ok {
return nil, fmt.Errorf("must specify articles_token_map")
}
articlesTokenMap, err := cache.TokenMapNamed(articlesTokenMapName)
if err != nil {
return nil, fmt.Errorf("error building elision filter: %v", err)
}
return NewElisionFilter(articlesTokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, ElisionFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/token/elision/elision_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package elision
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/tokenmap"
"github.com/blevesearch/bleve/v2/registry"
)
func TestElisionFilter(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ar" + string(Apostrophe) + "word"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("word"),
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ar" + string(RightSingleQuotationMark) + "word"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("word"),
},
},
},
}
cache := registry.NewCache()
articleListConfig := map[string]interface{}{
"type": tokenmap.Name,
"tokens": []interface{}{"ar"},
}
_, err := cache.DefineTokenMap("articles_test", articleListConfig)
if err != nil {
t.Fatal(err)
}
elisionConfig := map[string]interface{}{
"type": "elision",
"articles_token_map": "articles_test",
}
elisionFilter, err := cache.DefineTokenFilter("elision_test", elisionConfig)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := elisionFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/token/hierarchy/hierarchy.go
================================================
package hierarchy
import (
"bytes"
"fmt"
"math"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "hierarchy"
type HierarchyFilter struct {
maxLevels int
delimiter []byte
splitInput bool
}
func NewHierarchyFilter(delimiter []byte, maxLevels int, splitInput bool) *HierarchyFilter {
return &HierarchyFilter{
maxLevels: maxLevels,
delimiter: delimiter,
splitInput: splitInput,
}
}
func (s *HierarchyFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, s.maxLevels)
var soFar [][]byte
for _, token := range input {
if s.splitInput {
parts := bytes.Split(token.Term, s.delimiter)
for _, part := range parts {
soFar, rv = s.buildToken(rv, soFar, part)
if len(soFar) >= s.maxLevels {
return rv
}
}
} else {
soFar, rv = s.buildToken(rv, soFar, token.Term)
if len(soFar) >= s.maxLevels {
return rv
}
}
}
return rv
}
func (s *HierarchyFilter) buildToken(tokenStream analysis.TokenStream, soFar [][]byte, part []byte) (
[][]byte, analysis.TokenStream) {
soFar = append(soFar, part)
term := bytes.Join(soFar, s.delimiter)
tokenStream = append(tokenStream, &analysis.Token{
Type: analysis.Shingle,
Term: term,
Start: 0,
End: len(term),
Position: 1,
})
return soFar, tokenStream
}
func HierarchyFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
max := math.MaxInt64
maxVal, ok := config["max"].(float64)
if ok {
max = int(maxVal)
}
splitInput := true
splitInputVal, ok := config["split_input"].(bool)
if ok {
splitInput = splitInputVal
}
delimiter, ok := config["delimiter"].(string)
if !ok {
return nil, fmt.Errorf("must specify delimiter")
}
return NewHierarchyFilter([]byte(delimiter), max, splitInput), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, HierarchyFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/token/hierarchy/hierarchy_test.go
================================================
package hierarchy
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestHierarchyFilter(t *testing.T) {
tests := []struct {
name string
delimiter string
max int
splitInput bool
input analysis.TokenStream
output analysis.TokenStream
}{
{
name: "single token a/b/c, delimiter /",
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("a/b/c"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
Type: analysis.Shingle,
Start: 0,
End: 1,
Position: 1,
},
&analysis.Token{
Term: []byte("a/b"),
Type: analysis.Shingle,
Start: 0,
End: 3,
Position: 1,
},
&analysis.Token{
Term: []byte("a/b/c"),
Type: analysis.Shingle,
Start: 0,
End: 5,
Position: 1,
},
},
delimiter: "/",
max: 10,
splitInput: true,
},
{
name: "multiple tokens already split a b c, delimiter /",
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("b"),
},
&analysis.Token{
Term: []byte("c"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
Type: analysis.Shingle,
Start: 0,
End: 1,
Position: 1,
},
&analysis.Token{
Term: []byte("a/b"),
Type: analysis.Shingle,
Start: 0,
End: 3,
Position: 1,
},
&analysis.Token{
Term: []byte("a/b/c"),
Type: analysis.Shingle,
Start: 0,
End: 5,
Position: 1,
},
},
delimiter: "/",
max: 10,
splitInput: true,
},
{
name: "single token a/b/c, delimiter /, limit 2",
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("a/b/c"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
Type: analysis.Shingle,
Start: 0,
End: 1,
Position: 1,
},
&analysis.Token{
Term: []byte("a/b"),
Type: analysis.Shingle,
Start: 0,
End: 3,
Position: 1,
},
},
delimiter: "/",
max: 2,
splitInput: true,
},
{
name: "multiple tokens already split a b c, delimiter /, limit 2",
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("b"),
},
&analysis.Token{
Term: []byte("c"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
Type: analysis.Shingle,
Start: 0,
End: 1,
Position: 1,
},
&analysis.Token{
Term: []byte("a/b"),
Type: analysis.Shingle,
Start: 0,
End: 3,
Position: 1,
},
},
delimiter: "/",
max: 2,
splitInput: true,
},
{
name: "single token a/b/c, delimiter /, no split",
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("a/b/c"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("a/b/c"),
Type: analysis.Shingle,
Start: 0,
End: 5,
Position: 1,
},
},
delimiter: "/",
max: 10,
splitInput: false,
},
{
name: "multiple tokens already split a b c, delimiter /, no split",
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("b"),
},
&analysis.Token{
Term: []byte("c"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
Type: analysis.Shingle,
Start: 0,
End: 1,
Position: 1,
},
&analysis.Token{
Term: []byte("a/b"),
Type: analysis.Shingle,
Start: 0,
End: 3,
Position: 1,
},
&analysis.Token{
Term: []byte("a/b/c"),
Type: analysis.Shingle,
Start: 0,
End: 5,
Position: 1,
},
},
delimiter: "/",
max: 10,
splitInput: false,
},
}
for _, test := range tests {
test := test
t.Run(test.name, func(t *testing.T) {
filter := NewHierarchyFilter([]byte(test.delimiter), test.max, test.splitInput)
actual := filter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output, actual)
}
})
}
}
================================================
FILE: analysis/token/keyword/keyword.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package keyword
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "keyword_marker"
type KeyWordMarkerFilter struct {
keyWords analysis.TokenMap
}
func NewKeyWordMarkerFilter(keyWords analysis.TokenMap) *KeyWordMarkerFilter {
return &KeyWordMarkerFilter{
keyWords: keyWords,
}
}
func (f *KeyWordMarkerFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
_, isKeyWord := f.keyWords[string(token.Term)]
if isKeyWord {
token.KeyWord = true
}
}
return input
}
func KeyWordMarkerFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
keywordsTokenMapName, ok := config["keywords_token_map"].(string)
if !ok {
return nil, fmt.Errorf("must specify keywords_token_map")
}
keywordsTokenMap, err := cache.TokenMapNamed(keywordsTokenMapName)
if err != nil {
return nil, fmt.Errorf("error building keyword marker filter: %v", err)
}
return NewKeyWordMarkerFilter(keywordsTokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, KeyWordMarkerFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/token/keyword/keyword_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package keyword
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestKeyWordMarkerFilter(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("walk"),
},
&analysis.Token{
Term: []byte("in"),
},
&analysis.Token{
Term: []byte("the"),
},
&analysis.Token{
Term: []byte("park"),
},
}
expectedTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("walk"),
KeyWord: true,
},
&analysis.Token{
Term: []byte("in"),
},
&analysis.Token{
Term: []byte("the"),
},
&analysis.Token{
Term: []byte("park"),
KeyWord: true,
},
}
keyWordsMap := analysis.NewTokenMap()
keyWordsMap.AddToken("walk")
keyWordsMap.AddToken("park")
filter := NewKeyWordMarkerFilter(keyWordsMap)
ouputTokenStream := filter.Filter(inputTokenStream)
if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
t.Errorf("expected %#v got %#v", expectedTokenStream[0].KeyWord, ouputTokenStream[0].KeyWord)
}
}
================================================
FILE: analysis/token/length/length.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package length
import (
"fmt"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "length"
type LengthFilter struct {
min int
max int
}
func NewLengthFilter(min, max int) *LengthFilter {
return &LengthFilter{
min: min,
max: max,
}
}
func (f *LengthFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, len(input))
for _, token := range input {
wordLen := utf8.RuneCount(token.Term)
if f.min > 0 && f.min > wordLen {
continue
}
if f.max > 0 && f.max < wordLen {
continue
}
rv = append(rv, token)
}
return rv
}
func LengthFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
min := 0
max := 0
minVal, ok := config["min"].(float64)
if ok {
min = int(minVal)
}
maxVal, ok := config["max"].(float64)
if ok {
max = int(maxVal)
}
if min == max && max == 0 {
return nil, fmt.Errorf("either min or max must be non-zero")
}
return NewLengthFilter(min, max), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, LengthFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/token/length/length_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package length
import (
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestLengthFilter(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("1"),
},
&analysis.Token{
Term: []byte("two"),
},
&analysis.Token{
Term: []byte("three"),
},
}
lengthFilter := NewLengthFilter(3, 4)
ouputTokenStream := lengthFilter.Filter(inputTokenStream)
if len(ouputTokenStream) != 1 {
t.Fatalf("expected 1 output token")
}
if string(ouputTokenStream[0].Term) != "two" {
t.Errorf("expected term `two`, got `%s`", ouputTokenStream[0].Term)
}
}
func TestLengthFilterNoMax(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("1"),
},
&analysis.Token{
Term: []byte("two"),
},
&analysis.Token{
Term: []byte("three"),
},
}
lengthFilter := NewLengthFilter(3, -1)
ouputTokenStream := lengthFilter.Filter(inputTokenStream)
if len(ouputTokenStream) != 2 {
t.Fatalf("expected 2 output token")
}
if string(ouputTokenStream[0].Term) != "two" {
t.Errorf("expected term `two`, got `%s`", ouputTokenStream[0].Term)
}
if string(ouputTokenStream[1].Term) != "three" {
t.Errorf("expected term `three`, got `%s`", ouputTokenStream[0].Term)
}
}
func TestLengthFilterNoMin(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("1"),
},
&analysis.Token{
Term: []byte("two"),
},
&analysis.Token{
Term: []byte("three"),
},
}
lengthFilter := NewLengthFilter(-1, 4)
ouputTokenStream := lengthFilter.Filter(inputTokenStream)
if len(ouputTokenStream) != 2 {
t.Fatalf("expected 2 output token")
}
if string(ouputTokenStream[0].Term) != "1" {
t.Errorf("expected term `1`, got `%s`", ouputTokenStream[0].Term)
}
if string(ouputTokenStream[1].Term) != "two" {
t.Errorf("expected term `two`, got `%s`", ouputTokenStream[0].Term)
}
}
================================================
FILE: analysis/token/lowercase/lowercase.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package lowercase implements a TokenFilter which converts
// tokens to lower case according to unicode rules.
package lowercase
import (
"bytes"
"unicode"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
// Name is the name used to register LowerCaseFilter in the bleve registry
const Name = "to_lower"
type LowerCaseFilter struct {
}
func NewLowerCaseFilter() *LowerCaseFilter {
return &LowerCaseFilter{}
}
func (f *LowerCaseFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
token.Term = toLowerDeferredCopy(token.Term)
}
return input
}
func LowerCaseFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewLowerCaseFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, LowerCaseFilterConstructor)
if err != nil {
panic(err)
}
}
// toLowerDeferredCopy will function exactly like
// bytes.ToLower() only it will reuse (overwrite)
// the original byte array when possible
// NOTE: because its possible that the lower-case
// form of a rune has a different utf-8 encoded
// length, in these cases a new byte array is allocated
func toLowerDeferredCopy(s []byte) []byte {
j := 0
for i := 0; i < len(s); {
wid := 1
r := rune(s[i])
if r >= utf8.RuneSelf {
r, wid = utf8.DecodeRune(s[i:])
}
l := unicode.ToLower(r)
// If the rune is already lowercased, just move to the
// next rune.
if l == r {
i += wid
j += wid
continue
}
// Handles the Unicode edge-case where the last
// rune in a word on the greek Σ needs to be converted
// differently.
if l == 'σ' && i+2 == len(s) {
l = 'ς'
}
lwid := utf8.RuneLen(l)
if lwid > wid {
// utf-8 encoded replacement is wider
// for now, punt and defer
// to bytes.ToLower() for the remainder
// only known to happen with chars
// Rune Ⱥ(570) width 2 - Lower ⱥ(11365) width 3
// Rune Ⱦ(574) width 2 - Lower ⱦ(11366) width 3
rest := bytes.ToLower(s[i:])
rv := make([]byte, j+len(rest))
copy(rv[:j], s[:j])
copy(rv[j:], rest)
return rv
} else {
utf8.EncodeRune(s[j:], l)
}
i += wid
j += lwid
}
return s[:j]
}
================================================
FILE: analysis/token/lowercase/lowercase_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package lowercase
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestLowerCaseFilter(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("ONE"),
},
&analysis.Token{
Term: []byte("two"),
},
&analysis.Token{
Term: []byte("ThReE"),
},
&analysis.Token{
Term: []byte("steven's"),
},
// these characters are chosen in particular
// because the utf-8 encoding of the lower-case
// version has a different length
// Rune İ(304) width 2 - Lower i(105) width 1
// Rune Ⱥ(570) width 2 - Lower ⱥ(11365) width 3
// Rune Ⱦ(574) width 2 - Lower ⱦ(11366) width 3
&analysis.Token{
Term: []byte("İȺȾCAT"),
},
&analysis.Token{
Term: []byte("ȺȾCAT"),
},
&analysis.Token{
Term: []byte("ὈΔΥΣΣ"),
},
}
expectedTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("one"),
},
&analysis.Token{
Term: []byte("two"),
},
&analysis.Token{
Term: []byte("three"),
},
&analysis.Token{
Term: []byte("steven's"),
},
&analysis.Token{
Term: []byte("iⱥⱦcat"),
},
&analysis.Token{
Term: []byte("ⱥⱦcat"),
},
&analysis.Token{
Term: []byte("ὀδυσς"),
},
}
filter := NewLowerCaseFilter()
ouputTokenStream := filter.Filter(inputTokenStream)
if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
t.Errorf("expected %s got %s", expectedTokenStream[0].Term, ouputTokenStream[0].Term)
}
}
func BenchmarkLowerCaseFilter(b *testing.B) {
input := analysis.TokenStream{
&analysis.Token{
Term: []byte("A"),
},
&analysis.Token{
Term: []byte("boiling"),
},
&analysis.Token{
Term: []byte("liquid"),
},
&analysis.Token{
Term: []byte("expanding"),
},
&analysis.Token{
Term: []byte("vapor"),
},
&analysis.Token{
Term: []byte("explosion"),
},
&analysis.Token{
Term: []byte("caused"),
},
&analysis.Token{
Term: []byte("by"),
},
&analysis.Token{
Term: []byte("the"),
},
&analysis.Token{
Term: []byte("rupture"),
},
&analysis.Token{
Term: []byte("of"),
},
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("vessel"),
},
&analysis.Token{
Term: []byte("containing"),
},
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("pressurized"),
},
&analysis.Token{
Term: []byte("liquid"),
},
&analysis.Token{
Term: []byte("above"),
},
&analysis.Token{
Term: []byte("its"),
},
&analysis.Token{
Term: []byte("boiling"),
},
&analysis.Token{
Term: []byte("point"),
},
&analysis.Token{
Term: []byte("İȺȾCAT"),
},
&analysis.Token{
Term: []byte("ȺȾCAT"),
},
}
filter := NewLowerCaseFilter()
b.ResetTimer()
for i := 0; i < b.N; i++ {
filter.Filter(input)
}
}
================================================
FILE: analysis/token/ngram/ngram.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ngram
import (
"bytes"
"fmt"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "ngram"
type NgramFilter struct {
minLength int
maxLength int
}
func NewNgramFilter(minLength, maxLength int) *NgramFilter {
return &NgramFilter{
minLength: minLength,
maxLength: maxLength,
}
}
func (s *NgramFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, len(input))
for _, token := range input {
runeCount := utf8.RuneCount(token.Term)
runes := bytes.Runes(token.Term)
for i := 0; i < runeCount; i++ {
// index of the starting rune for this token
for ngramSize := s.minLength; ngramSize <= s.maxLength; ngramSize++ {
// build an ngram of this size starting at i
if i+ngramSize <= runeCount {
ngramTerm := analysis.BuildTermFromRunes(runes[i : i+ngramSize])
token := analysis.Token{
Position: token.Position,
Start: token.Start,
End: token.End,
Type: token.Type,
Term: ngramTerm,
}
rv = append(rv, &token)
}
}
}
}
return rv
}
func NgramFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
minVal, ok := config["min"]
if !ok {
return nil, fmt.Errorf("must specify min")
}
min, err := convertToInt(minVal)
if err != nil {
return nil, err
}
maxVal, ok := config["max"]
if !ok {
return nil, fmt.Errorf("must specify max")
}
max, err := convertToInt(maxVal)
if err != nil {
return nil, err
}
return NewNgramFilter(min, max), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, NgramFilterConstructor)
if err != nil {
panic(err)
}
}
// Expects either an int or a flaot64 value
func convertToInt(val interface{}) (int, error) {
var intVal int
var floatVal float64
var ok bool
intVal, ok = val.(int)
if ok {
return intVal, nil
}
floatVal, ok = val.(float64)
if ok {
return int(floatVal), nil
}
return 0, fmt.Errorf("failed to convert to int value")
}
================================================
FILE: analysis/token/ngram/ngram_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ngram
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestNgramFilter(t *testing.T) {
tests := []struct {
min int
max int
input analysis.TokenStream
output analysis.TokenStream
}{
{
min: 1,
max: 1,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("abcde"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("b"),
},
&analysis.Token{
Term: []byte("c"),
},
&analysis.Token{
Term: []byte("d"),
},
&analysis.Token{
Term: []byte("e"),
},
},
},
{
min: 2,
max: 2,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("abcde"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ab"),
},
&analysis.Token{
Term: []byte("bc"),
},
&analysis.Token{
Term: []byte("cd"),
},
&analysis.Token{
Term: []byte("de"),
},
},
},
{
min: 1,
max: 3,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("abcde"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("ab"),
},
&analysis.Token{
Term: []byte("abc"),
},
&analysis.Token{
Term: []byte("b"),
},
&analysis.Token{
Term: []byte("bc"),
},
&analysis.Token{
Term: []byte("bcd"),
},
&analysis.Token{
Term: []byte("c"),
},
&analysis.Token{
Term: []byte("cd"),
},
&analysis.Token{
Term: []byte("cde"),
},
&analysis.Token{
Term: []byte("d"),
},
&analysis.Token{
Term: []byte("de"),
},
&analysis.Token{
Term: []byte("e"),
},
},
},
}
for _, test := range tests {
ngramFilter := NewNgramFilter(test.min, test.max)
actual := ngramFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output, actual)
}
}
}
func TestConversionInt(t *testing.T) {
config := map[string]interface{}{
"type": Name,
"min": 3,
"max": 8,
}
f, err := NgramFilterConstructor(config, nil)
if err != nil {
t.Errorf("Failed to construct the ngram filter: %v", err)
}
ngram := f.(*NgramFilter)
if ngram.minLength != 3 && ngram.maxLength != 8 {
t.Errorf("Failed to construct the bounds. Got %v and %v.", ngram.minLength, ngram.maxLength)
}
}
func TestConversionFloat(t *testing.T) {
config := map[string]interface{}{
"type": Name,
"min": float64(3),
"max": float64(8),
}
f, err := NgramFilterConstructor(config, nil)
if err != nil {
t.Errorf("Failed to construct the ngram filter: %v", err)
}
ngram := f.(*NgramFilter)
if ngram.minLength != 3 && ngram.maxLength != 8 {
t.Errorf("Failed to construct the bounds. Got %v and %v.", ngram.minLength, ngram.maxLength)
}
}
func TestBadConversion(t *testing.T) {
config := map[string]interface{}{
"type": Name,
"min": "3",
}
_, err := NgramFilterConstructor(config, nil)
if err == nil {
t.Errorf("Expected conversion error.")
}
if err.Error() != "failed to convert to int value" {
t.Errorf("Wrong error recevied. Got %v.", err)
}
}
================================================
FILE: analysis/token/porter/porter.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package porter
import (
"bytes"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/go-porterstemmer"
)
const Name = "stemmer_porter"
type PorterStemmer struct {
}
func NewPorterStemmer() *PorterStemmer {
return &PorterStemmer{}
}
func (s *PorterStemmer) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
// if it is not a protected keyword, stem it
if !token.KeyWord {
termRunes := bytes.Runes(token.Term)
stemmedRunes := porterstemmer.StemWithoutLowerCasing(termRunes)
token.Term = analysis.BuildTermFromRunes(stemmedRunes)
}
}
return input
}
func PorterStemmerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewPorterStemmer(), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, PorterStemmerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/token/porter/porter_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package porter
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestPorterStemmer(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("walking"),
},
&analysis.Token{
Term: []byte("talked"),
},
&analysis.Token{
Term: []byte("business"),
},
&analysis.Token{
Term: []byte("protected"),
KeyWord: true,
},
&analysis.Token{
Term: []byte("cat"),
},
&analysis.Token{
Term: []byte("done"),
},
// a term which does stem, but does not change length
&analysis.Token{
Term: []byte("marty"),
},
}
expectedTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("walk"),
},
&analysis.Token{
Term: []byte("talk"),
},
&analysis.Token{
Term: []byte("busi"),
},
&analysis.Token{
Term: []byte("protected"),
KeyWord: true,
},
&analysis.Token{
Term: []byte("cat"),
},
&analysis.Token{
Term: []byte("done"),
},
&analysis.Token{
Term: []byte("marti"),
},
}
filter := NewPorterStemmer()
ouputTokenStream := filter.Filter(inputTokenStream)
if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
t.Errorf("expected %#v got %#v", expectedTokenStream[3], ouputTokenStream[3])
}
}
func BenchmarkPorterStemmer(b *testing.B) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("walking"),
},
&analysis.Token{
Term: []byte("talked"),
},
&analysis.Token{
Term: []byte("business"),
},
&analysis.Token{
Term: []byte("protected"),
KeyWord: true,
},
&analysis.Token{
Term: []byte("cat"),
},
&analysis.Token{
Term: []byte("done"),
},
}
filter := NewPorterStemmer()
b.ResetTimer()
for i := 0; i < b.N; i++ {
filter.Filter(inputTokenStream)
}
}
================================================
FILE: analysis/token/reverse/reverse.go
================================================
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package reverse
import (
"unicode"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
// Name is the name used to register ReverseFilter in the bleve registry
const Name = "reverse"
type ReverseFilter struct {
}
func NewReverseFilter() *ReverseFilter {
return &ReverseFilter{}
}
func (f *ReverseFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
token.Term = reverse(token.Term)
}
return input
}
func ReverseFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewReverseFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, ReverseFilterConstructor)
if err != nil {
panic(err)
}
}
// reverse(..) will generate a reversed version of the provided
// unicode array and return it back to its caller.
func reverse(s []byte) []byte {
cursorIn := 0
inputRunes := []rune(string(s))
cursorOut := len(s)
output := make([]byte, len(s))
for i := 0; i < len(inputRunes); {
wid := utf8.RuneLen(inputRunes[i])
i++
for i < len(inputRunes) {
r := inputRunes[i]
if unicode.Is(unicode.Mn, r) || unicode.Is(unicode.Me, r) || unicode.Is(unicode.Mc, r) {
wid += utf8.RuneLen(r)
i++
} else {
break
}
}
copy(output[cursorOut-wid:cursorOut], s[cursorIn:cursorIn+wid])
cursorIn += wid
cursorOut -= wid
}
return output
}
================================================
FILE: analysis/token/reverse/reverse_test.go
================================================
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package reverse
import (
"bytes"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestReverseFilter(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{},
&analysis.Token{
Term: []byte("one"),
},
&analysis.Token{
Term: []byte("TWo"),
},
&analysis.Token{
Term: []byte("thRee"),
},
&analysis.Token{
Term: []byte("four's"),
},
&analysis.Token{
Term: []byte("what's this in reverse"),
},
&analysis.Token{
Term: []byte("œ∑´®†"),
},
&analysis.Token{
Term: []byte("İȺȾCAT÷≥≤µ123"),
},
&analysis.Token{
Term: []byte("!@#$%^&*()"),
},
&analysis.Token{
Term: []byte("cafés"),
},
&analysis.Token{
Term: []byte("¿Dónde estás?"),
},
&analysis.Token{
Term: []byte("Me gustaría una cerveza."),
},
}
expectedTokenStream := analysis.TokenStream{
&analysis.Token{},
&analysis.Token{
Term: []byte("eno"),
},
&analysis.Token{
Term: []byte("oWT"),
},
&analysis.Token{
Term: []byte("eeRht"),
},
&analysis.Token{
Term: []byte("s'ruof"),
},
&analysis.Token{
Term: []byte("esrever ni siht s'tahw"),
},
&analysis.Token{
Term: []byte("†®´∑œ"),
},
&analysis.Token{
Term: []byte("321µ≤≥÷TACȾȺİ"),
},
&analysis.Token{
Term: []byte(")(*&^%$#@!"),
},
&analysis.Token{
Term: []byte("séfac"),
},
&analysis.Token{
Term: []byte("?sátse ednóD¿"),
},
&analysis.Token{
Term: []byte(".azevrec anu aíratsug eM"),
},
}
filter := NewReverseFilter()
outputTokenStream := filter.Filter(inputTokenStream)
for i := 0; i < len(expectedTokenStream); i++ {
if !bytes.Equal(outputTokenStream[i].Term, expectedTokenStream[i].Term) {
t.Errorf("[%d] expected %s got %s",
i+1, expectedTokenStream[i].Term, outputTokenStream[i].Term)
}
}
}
func BenchmarkReverseFilter(b *testing.B) {
input := analysis.TokenStream{
&analysis.Token{
Term: []byte("A"),
},
&analysis.Token{
Term: []byte("boiling"),
},
&analysis.Token{
Term: []byte("liquid"),
},
&analysis.Token{
Term: []byte("expanding"),
},
&analysis.Token{
Term: []byte("vapor"),
},
&analysis.Token{
Term: []byte("explosion"),
},
&analysis.Token{
Term: []byte("caused"),
},
&analysis.Token{
Term: []byte("by"),
},
&analysis.Token{
Term: []byte("the"),
},
&analysis.Token{
Term: []byte("rupture"),
},
&analysis.Token{
Term: []byte("of"),
},
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("vessel"),
},
&analysis.Token{
Term: []byte("containing"),
},
&analysis.Token{
Term: []byte("pressurized"),
},
&analysis.Token{
Term: []byte("liquid"),
},
&analysis.Token{
Term: []byte("above"),
},
&analysis.Token{
Term: []byte("its"),
},
&analysis.Token{
Term: []byte("boiling"),
},
&analysis.Token{
Term: []byte("point"),
},
&analysis.Token{
Term: []byte("İȺȾCAT"),
},
&analysis.Token{
Term: []byte("Me gustaría una cerveza."),
},
}
filter := NewReverseFilter()
b.ResetTimer()
for i := 0; i < b.N; i++ {
filter.Filter(input)
}
}
================================================
FILE: analysis/token/shingle/shingle.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package shingle
import (
"container/ring"
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "shingle"
type ShingleFilter struct {
min int
max int
outputOriginal bool
tokenSeparator string
fill string
}
func NewShingleFilter(min, max int, outputOriginal bool, sep, fill string) *ShingleFilter {
return &ShingleFilter{
min: min,
max: max,
outputOriginal: outputOriginal,
tokenSeparator: sep,
fill: fill,
}
}
func (s *ShingleFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, len(input))
ring := ring.New(s.max)
itemsInRing := 0
currentPosition := 0
for _, token := range input {
if s.outputOriginal {
rv = append(rv, token)
}
// if there are gaps, insert filler tokens
offset := token.Position - currentPosition
for offset > 1 {
fillerToken := analysis.Token{
Position: 0,
Start: -1,
End: -1,
Type: analysis.AlphaNumeric,
Term: []byte(s.fill),
}
ring.Value = &fillerToken
if itemsInRing < s.max {
itemsInRing++
}
rv = append(rv, s.shingleCurrentRingState(ring, itemsInRing)...)
ring = ring.Next()
offset--
}
currentPosition = token.Position
ring.Value = token
if itemsInRing < s.max {
itemsInRing++
}
rv = append(rv, s.shingleCurrentRingState(ring, itemsInRing)...)
ring = ring.Next()
}
return rv
}
func (s *ShingleFilter) shingleCurrentRingState(ring *ring.Ring, itemsInRing int) analysis.TokenStream {
rv := make(analysis.TokenStream, 0)
for shingleN := s.min; shingleN <= s.max; shingleN++ {
// if there are enough items in the ring
// to produce a shingle of this size
if itemsInRing >= shingleN {
thisShingleRing := ring.Move(-(shingleN - 1))
shingledBytes := make([]byte, 0)
pos := 0
start := -1
end := 0
for i := 0; i < shingleN; i++ {
if i != 0 {
shingledBytes = append(shingledBytes, []byte(s.tokenSeparator)...)
}
curr := thisShingleRing.Value.(*analysis.Token)
if pos == 0 && curr.Position != 0 {
pos = curr.Position
}
if start == -1 && curr.Start != -1 {
start = curr.Start
}
if curr.End != -1 {
end = curr.End
}
shingledBytes = append(shingledBytes, curr.Term...)
thisShingleRing = thisShingleRing.Next()
}
token := analysis.Token{
Type: analysis.Shingle,
Term: shingledBytes,
}
if pos != 0 {
token.Position = pos
}
if start != -1 {
token.Start = start
}
if end != -1 {
token.End = end
}
rv = append(rv, &token)
}
}
return rv
}
func ShingleFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
minVal, ok := config["min"].(float64)
if !ok {
return nil, fmt.Errorf("must specify min")
}
min := int(minVal)
maxVal, ok := config["max"].(float64)
if !ok {
return nil, fmt.Errorf("must specify max")
}
max := int(maxVal)
outputOriginal := false
outVal, ok := config["output_original"].(bool)
if ok {
outputOriginal = outVal
}
sep := " "
sepVal, ok := config["separator"].(string)
if ok {
sep = sepVal
}
fill := "_"
fillVal, ok := config["filler"].(string)
if ok {
fill = fillVal
}
return NewShingleFilter(min, max, outputOriginal, sep, fill), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, ShingleFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/token/shingle/shingle_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package shingle
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestShingleFilter(t *testing.T) {
tests := []struct {
min int
max int
outputOriginal bool
separator string
filler string
input analysis.TokenStream
output analysis.TokenStream
}{
{
min: 2,
max: 2,
outputOriginal: false,
separator: " ",
filler: "_",
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("the"),
},
&analysis.Token{
Term: []byte("quick"),
},
&analysis.Token{
Term: []byte("brown"),
},
&analysis.Token{
Term: []byte("fox"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("the quick"),
Type: analysis.Shingle,
},
&analysis.Token{
Term: []byte("quick brown"),
Type: analysis.Shingle,
},
&analysis.Token{
Term: []byte("brown fox"),
Type: analysis.Shingle,
},
},
},
{
min: 3,
max: 3,
outputOriginal: false,
separator: " ",
filler: "_",
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("the"),
},
&analysis.Token{
Term: []byte("quick"),
},
&analysis.Token{
Term: []byte("brown"),
},
&analysis.Token{
Term: []byte("fox"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("the quick brown"),
Type: analysis.Shingle,
},
&analysis.Token{
Term: []byte("quick brown fox"),
Type: analysis.Shingle,
},
},
},
{
min: 2,
max: 3,
outputOriginal: false,
separator: " ",
filler: "_",
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("the"),
},
&analysis.Token{
Term: []byte("quick"),
},
&analysis.Token{
Term: []byte("brown"),
},
&analysis.Token{
Term: []byte("fox"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("the quick"),
Type: analysis.Shingle,
},
&analysis.Token{
Term: []byte("quick brown"),
Type: analysis.Shingle,
},
&analysis.Token{
Term: []byte("the quick brown"),
Type: analysis.Shingle,
},
&analysis.Token{
Term: []byte("brown fox"),
Type: analysis.Shingle,
},
&analysis.Token{
Term: []byte("quick brown fox"),
Type: analysis.Shingle,
},
},
},
{
min: 3,
max: 3,
outputOriginal: false,
separator: " ",
filler: "_",
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ugly"),
Position: 1,
},
&analysis.Token{
Term: []byte("quick"),
Position: 3,
},
&analysis.Token{
Term: []byte("brown"),
Position: 4,
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ugly _ quick"),
Type: analysis.Shingle,
Position: 1,
},
&analysis.Token{
Term: []byte("_ quick brown"),
Type: analysis.Shingle,
Position: 3,
},
},
},
{
min: 1,
max: 5,
outputOriginal: false,
separator: " ",
filler: "_",
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("test"),
Position: 1,
},
&analysis.Token{
Term: []byte("text"),
Position: 2,
},
// token 3 removed by stop filter
&analysis.Token{
Term: []byte("see"),
Position: 4,
},
&analysis.Token{
Term: []byte("shingles"),
Position: 5,
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("test"),
Type: analysis.Shingle,
Position: 1,
},
&analysis.Token{
Term: []byte("text"),
Type: analysis.Shingle,
Position: 2,
},
&analysis.Token{
Term: []byte("test text"),
Type: analysis.Shingle,
Position: 1,
},
&analysis.Token{
Term: []byte("_"),
Type: analysis.Shingle,
},
&analysis.Token{
Term: []byte("text _"),
Type: analysis.Shingle,
Position: 2,
},
&analysis.Token{
Term: []byte("test text _"),
Type: analysis.Shingle,
Position: 1,
},
&analysis.Token{
Term: []byte("see"),
Type: analysis.Shingle,
Position: 4,
},
&analysis.Token{
Term: []byte("_ see"),
Type: analysis.Shingle,
Position: 4,
},
&analysis.Token{
Term: []byte("text _ see"),
Type: analysis.Shingle,
Position: 2,
},
&analysis.Token{
Term: []byte("test text _ see"),
Type: analysis.Shingle,
Position: 1,
},
&analysis.Token{
Term: []byte("shingles"),
Type: analysis.Shingle,
Position: 5,
},
&analysis.Token{
Term: []byte("see shingles"),
Type: analysis.Shingle,
Position: 4,
},
&analysis.Token{
Term: []byte("_ see shingles"),
Type: analysis.Shingle,
Position: 4,
},
&analysis.Token{
Term: []byte("text _ see shingles"),
Type: analysis.Shingle,
Position: 2,
},
&analysis.Token{
Term: []byte("test text _ see shingles"),
Type: analysis.Shingle,
Position: 1,
},
},
},
{
min: 2,
max: 2,
outputOriginal: true,
separator: " ",
filler: "_",
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("the"),
},
&analysis.Token{
Term: []byte("quick"),
},
&analysis.Token{
Term: []byte("brown"),
},
&analysis.Token{
Term: []byte("fox"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("the"),
},
&analysis.Token{
Term: []byte("quick"),
},
&analysis.Token{
Term: []byte("the quick"),
Type: analysis.Shingle,
},
&analysis.Token{
Term: []byte("brown"),
},
&analysis.Token{
Term: []byte("quick brown"),
Type: analysis.Shingle,
},
&analysis.Token{
Term: []byte("fox"),
},
&analysis.Token{
Term: []byte("brown fox"),
Type: analysis.Shingle,
},
},
},
}
for _, test := range tests {
shingleFilter := NewShingleFilter(test.min, test.max, test.outputOriginal, test.separator, test.filler)
actual := shingleFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output, actual)
}
}
}
// TestShingleFilterBug431 tests that the shingle filter is in fact stateless
// by making using the same filter instance twice and ensuring we do not get
// contaminated output
func TestShingleFilterBug431(t *testing.T) {
tests := []struct {
input analysis.TokenStream
output analysis.TokenStream
}{
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("the"),
},
&analysis.Token{
Term: []byte("quick"),
},
&analysis.Token{
Term: []byte("brown"),
},
&analysis.Token{
Term: []byte("fox"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("the quick"),
Type: analysis.Shingle,
},
&analysis.Token{
Term: []byte("quick brown"),
Type: analysis.Shingle,
},
&analysis.Token{
Term: []byte("brown fox"),
Type: analysis.Shingle,
},
},
},
{
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("sad"),
},
&analysis.Token{
Term: []byte("dirty"),
},
&analysis.Token{
Term: []byte("sock"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("a sad"),
Type: analysis.Shingle,
},
&analysis.Token{
Term: []byte("sad dirty"),
Type: analysis.Shingle,
},
&analysis.Token{
Term: []byte("dirty sock"),
Type: analysis.Shingle,
},
},
},
}
shingleFilter := NewShingleFilter(2, 2, false, " ", "_")
for _, test := range tests {
actual := shingleFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output, actual)
}
}
}
================================================
FILE: analysis/token/snowball/snowball.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package snowball
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/snowball"
)
const Name = "stemmer_snowball"
type SnowballStemmer struct {
language string
}
func NewSnowballStemmer(language string) *SnowballStemmer {
return &SnowballStemmer{
language: language,
}
}
func (s *SnowballStemmer) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
// if it is not a protected keyword, stem it
if !token.KeyWord {
stemmed, _ := snowball.Stem(string(token.Term), s.language, true)
token.Term = []byte(stemmed)
}
}
return input
}
func SnowballStemmerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
language, ok := config["language"].(string)
if !ok {
return nil, fmt.Errorf("must specify language")
}
return NewSnowballStemmer(language), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, SnowballStemmerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/token/snowball/snowball_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package snowball
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestSnowballStemmer(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("walking"),
},
&analysis.Token{
Term: []byte("talked"),
},
&analysis.Token{
Term: []byte("business"),
},
&analysis.Token{
Term: []byte("protected"),
KeyWord: true,
},
&analysis.Token{
Term: []byte("cat"),
},
&analysis.Token{
Term: []byte("done"),
},
// a term which does stem, but does not change length
&analysis.Token{
Term: []byte("marty"),
},
}
expectedTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("walk"),
},
&analysis.Token{
Term: []byte("talk"),
},
&analysis.Token{
Term: []byte("busi"),
},
&analysis.Token{
Term: []byte("protected"),
KeyWord: true,
},
&analysis.Token{
Term: []byte("cat"),
},
&analysis.Token{
Term: []byte("done"),
},
&analysis.Token{
Term: []byte("marti"),
},
}
filter := NewSnowballStemmer("english")
ouputTokenStream := filter.Filter(inputTokenStream)
if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
t.Errorf("expected %#v got %#v", expectedTokenStream[3], ouputTokenStream[3])
}
}
func BenchmarkSnowballStemmer(b *testing.B) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("walking"),
},
&analysis.Token{
Term: []byte("talked"),
},
&analysis.Token{
Term: []byte("business"),
},
&analysis.Token{
Term: []byte("protected"),
KeyWord: true,
},
&analysis.Token{
Term: []byte("cat"),
},
&analysis.Token{
Term: []byte("done"),
},
}
filter := NewSnowballStemmer("english")
b.ResetTimer()
for i := 0; i < b.N; i++ {
filter.Filter(inputTokenStream)
}
}
================================================
FILE: analysis/token/stop/stop.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package stop implements a TokenFilter removing tokens found in
// a TokenMap.
//
// It constructor takes the following arguments:
//
// "stop_token_map" (string): the name of the token map identifying tokens to
// remove.
package stop
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "stop_tokens"
type StopTokensFilter struct {
stopTokens analysis.TokenMap
}
func NewStopTokensFilter(stopTokens analysis.TokenMap) *StopTokensFilter {
return &StopTokensFilter{
stopTokens: stopTokens,
}
}
func (f *StopTokensFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
j := 0
for _, token := range input {
_, isStopToken := f.stopTokens[string(token.Term)]
if !isStopToken {
input[j] = token
j++
}
}
return input[:j]
}
func StopTokensFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
stopTokenMapName, ok := config["stop_token_map"].(string)
if !ok {
return nil, fmt.Errorf("must specify stop_token_map")
}
stopTokenMap, err := cache.TokenMapNamed(stopTokenMapName)
if err != nil {
return nil, fmt.Errorf("error building stop words filter: %v", err)
}
return NewStopTokensFilter(stopTokenMap), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, StopTokensFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/token/stop/stop_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package stop
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/tokenmap"
"github.com/blevesearch/bleve/v2/registry"
)
func TestStopWordsFilter(t *testing.T) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("walk"),
},
&analysis.Token{
Term: []byte("in"),
},
&analysis.Token{
Term: []byte("the"),
},
&analysis.Token{
Term: []byte("park"),
},
}
expectedTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("walk"),
},
&analysis.Token{
Term: []byte("park"),
},
}
cache := registry.NewCache()
stopListConfig := map[string]interface{}{
"type": tokenmap.Name,
"tokens": []interface{}{"a", "in", "the"},
}
_, err := cache.DefineTokenMap("stop_test", stopListConfig)
if err != nil {
t.Fatal(err)
}
stopConfig := map[string]interface{}{
"type": "stop_tokens",
"stop_token_map": "stop_test",
}
stopFilter, err := cache.DefineTokenFilter("stop_test", stopConfig)
if err != nil {
t.Fatal(err)
}
ouputTokenStream := stopFilter.Filter(inputTokenStream)
if !reflect.DeepEqual(ouputTokenStream, expectedTokenStream) {
t.Errorf("expected %#v got %#v", expectedTokenStream, ouputTokenStream)
}
}
func BenchmarkStopWordsFilter(b *testing.B) {
inputTokenStream := analysis.TokenStream{
&analysis.Token{
Term: []byte("a"),
},
&analysis.Token{
Term: []byte("walk"),
},
&analysis.Token{
Term: []byte("in"),
},
&analysis.Token{
Term: []byte("the"),
},
&analysis.Token{
Term: []byte("park"),
},
}
cache := registry.NewCache()
stopListConfig := map[string]interface{}{
"type": tokenmap.Name,
"tokens": []interface{}{"a", "in", "the"},
}
_, err := cache.DefineTokenMap("stop_test", stopListConfig)
if err != nil {
b.Fatal(err)
}
stopConfig := map[string]interface{}{
"type": "stop_tokens",
"stop_token_map": "stop_test",
}
stopFilter, err := cache.DefineTokenFilter("stop_test", stopConfig)
if err != nil {
b.Fatal(err)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
stopFilter.Filter(inputTokenStream)
}
}
================================================
FILE: analysis/token/truncate/truncate.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package truncate
import (
"fmt"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "truncate_token"
type TruncateTokenFilter struct {
length int
}
func NewTruncateTokenFilter(length int) *TruncateTokenFilter {
return &TruncateTokenFilter{
length: length,
}
}
func (s *TruncateTokenFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
wordLen := utf8.RuneCount(token.Term)
if wordLen > s.length {
token.Term = analysis.TruncateRunes(token.Term, wordLen-s.length)
}
}
return input
}
func TruncateTokenFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
lenVal, ok := config["length"].(float64)
if !ok {
return nil, fmt.Errorf("must specify length")
}
length := int(lenVal)
return NewTruncateTokenFilter(length), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, TruncateTokenFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/token/truncate/truncate_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package truncate
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestTruncateTokenFilter(t *testing.T) {
tests := []struct {
length int
input analysis.TokenStream
output analysis.TokenStream
}{
{
length: 5,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("abcdefgh"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("abcde"),
},
},
},
{
length: 3,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("こんにちは世界"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("こんに"),
},
},
},
{
length: 10,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("แยกคำภาษาไ"),
},
},
},
}
for _, test := range tests {
truncateTokenFilter := NewTruncateTokenFilter(test.length)
actual := truncateTokenFilter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/token/unicodenorm/unicodenorm.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package unicodenorm
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
"golang.org/x/text/unicode/norm"
)
const Name = "normalize_unicode"
const NFC = "nfc"
const NFD = "nfd"
const NFKC = "nfkc"
const NFKD = "nfkd"
var forms = map[string]norm.Form{
NFC: norm.NFC,
NFD: norm.NFD,
NFKC: norm.NFKC,
NFKD: norm.NFKD,
}
type UnicodeNormalizeFilter struct {
form norm.Form
}
func NewUnicodeNormalizeFilter(formName string) (*UnicodeNormalizeFilter, error) {
form, ok := forms[formName]
if !ok {
return nil, fmt.Errorf("no form named %s", formName)
}
return &UnicodeNormalizeFilter{
form: form,
}, nil
}
func MustNewUnicodeNormalizeFilter(formName string) *UnicodeNormalizeFilter {
filter, err := NewUnicodeNormalizeFilter(formName)
if err != nil {
panic(err)
}
return filter
}
func (s *UnicodeNormalizeFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
for _, token := range input {
token.Term = s.form.Bytes(token.Term)
}
return input
}
func UnicodeNormalizeFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
formVal, ok := config["form"].(string)
if !ok {
return nil, fmt.Errorf("must specify form")
}
form := formVal
return NewUnicodeNormalizeFilter(form)
}
func init() {
err := registry.RegisterTokenFilter(Name, UnicodeNormalizeFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/token/unicodenorm/unicodenorm_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package unicodenorm
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
// the following tests come from the lucene
// test cases for CJK width filter
// which is our basis for using this
// as a substitute for that
func TestUnicodeNormalization(t *testing.T) {
tests := []struct {
formName string
input analysis.TokenStream
output analysis.TokenStream
}{
{
formName: NFKD,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("Test"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("Test"),
},
},
},
{
formName: NFKD,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("1234"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("1234"),
},
},
},
{
formName: NFKD,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("カタカナ"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("カタカナ"),
},
},
},
{
formName: NFKC,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("ヴィッツ"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("ヴィッツ"),
},
},
},
{
formName: NFKC,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("パナソニック"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("パナソニック"),
},
},
},
{
formName: NFD,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u212B"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0041\u030A"),
},
},
},
{
formName: NFC,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u212B"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u00C5"),
},
},
},
{
formName: NFKD,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\uFB01"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0066\u0069"),
},
},
},
{
formName: NFKC,
input: analysis.TokenStream{
&analysis.Token{
Term: []byte("\uFB01"),
},
},
output: analysis.TokenStream{
&analysis.Token{
Term: []byte("\u0066\u0069"),
},
},
},
}
for _, test := range tests {
filter := MustNewUnicodeNormalizeFilter(test.formName)
actual := filter.Filter(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %s, got %s", test.output[0].Term, actual[0].Term)
t.Errorf("expected %#v, got %#v", test.output[0].Term, actual[0].Term)
}
}
}
================================================
FILE: analysis/token/unique/unique.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package unique
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "unique"
// UniqueTermFilter retains only the tokens which mark the first occurrence of
// a term. Tokens whose term appears in a preceding token are dropped.
type UniqueTermFilter struct{}
func NewUniqueTermFilter() *UniqueTermFilter {
return &UniqueTermFilter{}
}
func (f *UniqueTermFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
encounteredTerms := make(map[string]struct{}, len(input)/4)
j := 0
for _, token := range input {
term := string(token.Term)
if _, ok := encounteredTerms[term]; ok {
continue
}
encounteredTerms[term] = struct{}{}
input[j] = token
j++
}
return input[:j]
}
func UniqueTermFilterConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenFilter, error) {
return NewUniqueTermFilter(), nil
}
func init() {
err := registry.RegisterTokenFilter(Name, UniqueTermFilterConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/token/unique/unique_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package unique
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestUniqueTermFilter(t *testing.T) {
var tests = []struct {
input analysis.TokenStream
// expected indices of input which should be included in the output. We
// use indices instead of another TokenStream, since position/start/end
// should be preserved.
expectedIndices []int
}{
{
input: tokenStream(),
expectedIndices: []int{},
},
{
input: tokenStream("a"),
expectedIndices: []int{0},
},
{
input: tokenStream("each", "term", "in", "this", "sentence", "is", "unique"),
expectedIndices: []int{0, 1, 2, 3, 4, 5, 6},
},
{
input: tokenStream("Lui", "è", "alto", "e", "lei", "è", "bassa"),
expectedIndices: []int{0, 1, 2, 3, 4, 6},
},
{
input: tokenStream("a", "a", "A", "a", "a", "A"),
expectedIndices: []int{0, 2},
},
}
uniqueTermFilter := NewUniqueTermFilter()
for _, test := range tests {
expected := subStream(test.input, test.expectedIndices)
actual := uniqueTermFilter.Filter(test.input)
if !reflect.DeepEqual(actual, expected) {
t.Errorf("expected %s \n\n got %s", expected, actual)
}
}
}
func tokenStream(termStrs ...string) analysis.TokenStream {
tokenStream := make([]*analysis.Token, len(termStrs))
index := 0
for i, termStr := range termStrs {
tokenStream[i] = &analysis.Token{
Term: []byte(termStr),
Position: i + 1,
Start: index,
End: index + len(termStr),
}
index += len(termStr)
}
return analysis.TokenStream(tokenStream)
}
func subStream(stream analysis.TokenStream, indices []int) analysis.TokenStream {
result := make(analysis.TokenStream, len(indices))
for i, index := range indices {
result[i] = stream[index]
}
return result
}
================================================
FILE: analysis/tokenizer/character/character.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package character
import (
"unicode/utf8"
"github.com/blevesearch/bleve/v2/analysis"
)
type IsTokenRune func(r rune) bool
type CharacterTokenizer struct {
isTokenRun IsTokenRune
}
func NewCharacterTokenizer(f IsTokenRune) *CharacterTokenizer {
return &CharacterTokenizer{
isTokenRun: f,
}
}
func (c *CharacterTokenizer) Tokenize(input []byte) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, 1024)
offset := 0
start := 0
end := 0
count := 0
for currRune, size := utf8.DecodeRune(input[offset:]); currRune != utf8.RuneError; currRune, size = utf8.DecodeRune(input[offset:]) {
isToken := c.isTokenRun(currRune)
if isToken {
end = offset + size
} else {
if end-start > 0 {
// build token
rv = append(rv, &analysis.Token{
Term: input[start:end],
Start: start,
End: end,
Position: count + 1,
Type: analysis.AlphaNumeric,
})
count++
}
start = offset + size
end = start
}
offset += size
}
// if we ended in the middle of a token, finish it
if end-start > 0 {
// build token
rv = append(rv, &analysis.Token{
Term: input[start:end],
Start: start,
End: end,
Position: count + 1,
Type: analysis.AlphaNumeric,
})
}
return rv
}
================================================
FILE: analysis/tokenizer/character/character_test.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package character
import (
"reflect"
"testing"
"unicode"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestCharacterTokenizer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
{
[]byte("Hello World."),
analysis.TokenStream{
{
Start: 0,
End: 5,
Term: []byte("Hello"),
Position: 1,
Type: analysis.AlphaNumeric,
},
{
Start: 6,
End: 11,
Term: []byte("World"),
Position: 2,
Type: analysis.AlphaNumeric,
},
},
},
{
[]byte("dominique@mcdiabetes.com"),
analysis.TokenStream{
{
Start: 0,
End: 9,
Term: []byte("dominique"),
Position: 1,
Type: analysis.AlphaNumeric,
},
{
Start: 10,
End: 20,
Term: []byte("mcdiabetes"),
Position: 2,
Type: analysis.AlphaNumeric,
},
{
Start: 21,
End: 24,
Term: []byte("com"),
Position: 3,
Type: analysis.AlphaNumeric,
},
},
},
}
tokenizer := NewCharacterTokenizer(unicode.IsLetter)
for _, test := range tests {
actual := tokenizer.Tokenize(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
}
}
}
================================================
FILE: analysis/tokenizer/exception/exception.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// package exception implements a Tokenizer which extracts pieces matched by a
// regular expression from the input data, delegates the rest to another
// tokenizer, then insert back extracted parts in the token stream. Use it to
// preserve sequences which a regular tokenizer would alter or remove.
//
// Its constructor takes the following arguments:
//
// "exceptions" ([]string): one or more Go regular expressions matching the
// sequence to preserve. Multiple expressions are combined with "|".
//
// "tokenizer" (string): the name of the tokenizer processing the data not
// matched by "exceptions".
package exception
import (
"fmt"
"regexp"
"strings"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "exception"
type ExceptionsTokenizer struct {
exception *regexp.Regexp
remaining analysis.Tokenizer
}
func NewExceptionsTokenizer(exception *regexp.Regexp, remaining analysis.Tokenizer) *ExceptionsTokenizer {
return &ExceptionsTokenizer{
exception: exception,
remaining: remaining,
}
}
func (t *ExceptionsTokenizer) Tokenize(input []byte) analysis.TokenStream {
rv := make(analysis.TokenStream, 0)
matches := t.exception.FindAllIndex(input, -1)
currInput := 0
lastPos := 0
for _, match := range matches {
start := match[0]
end := match[1]
if start > currInput {
// need to defer to remaining for unprocessed section
intermediate := t.remaining.Tokenize(input[currInput:start])
// add intermediate tokens to our result stream
for _, token := range intermediate {
// adjust token offsets
token.Position += lastPos
token.Start += currInput
token.End += currInput
rv = append(rv, token)
}
lastPos += len(intermediate)
currInput = start
}
// create single token with this regexp match
token := &analysis.Token{
Term: input[start:end],
Start: start,
End: end,
Position: lastPos + 1,
}
rv = append(rv, token)
lastPos++
currInput = end
}
if currInput < len(input) {
// need to defer to remaining for unprocessed section
intermediate := t.remaining.Tokenize(input[currInput:])
// add intermediate tokens to our result stream
for _, token := range intermediate {
// adjust token offsets
token.Position += lastPos
token.Start += currInput
token.End += currInput
rv = append(rv, token)
}
}
return rv
}
func ExceptionsTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
exceptions := []string{}
iexceptions, ok := config["exceptions"].([]interface{})
if ok {
for _, exception := range iexceptions {
exception, ok := exception.(string)
if ok {
exceptions = append(exceptions, exception)
}
}
}
aexceptions, ok := config["exceptions"].([]string)
if ok {
exceptions = append(exceptions, aexceptions...)
}
if len(exceptions) == 0 {
return nil, fmt.Errorf("no pattern found in 'exception' property")
}
exceptionPattern := strings.Join(exceptions, "|")
r, err := regexp.Compile(exceptionPattern)
if err != nil {
return nil, fmt.Errorf("unable to build regexp tokenizer: %v", err)
}
remainingName, ok := config["tokenizer"].(string)
if !ok {
return nil, fmt.Errorf("must specify tokenizer for remaining input")
}
remaining, err := cache.TokenizerNamed(remainingName)
if err != nil {
return nil, err
}
return NewExceptionsTokenizer(r, remaining), nil
}
func init() {
err := registry.RegisterTokenizer(Name, ExceptionsTokenizerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/tokenizer/exception/exception_test.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package exception
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
_ "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/registry"
)
func TestExceptionsTokenizer(t *testing.T) {
tests := []struct {
config map[string]interface{}
input []byte
patterns []string
result analysis.TokenStream
}{
{
input: []byte("test http://blevesearch.com/ words"),
config: map[string]interface{}{
"type": "exception",
"tokenizer": "unicode",
"exceptions": []interface{}{
`[hH][tT][tT][pP][sS]?://(\S)*`,
`[fF][iI][lL][eE]://(\S)*`,
`[fF][tT][pP]://(\S)*`,
},
},
result: analysis.TokenStream{
&analysis.Token{
Term: []byte("test"),
Position: 1,
Start: 0,
End: 4,
},
&analysis.Token{
Term: []byte("http://blevesearch.com/"),
Position: 2,
Start: 5,
End: 28,
},
&analysis.Token{
Term: []byte("words"),
Position: 3,
Start: 29,
End: 34,
},
},
},
{
input: []byte("what ftp://blevesearch.com/ songs"),
config: map[string]interface{}{
"type": "exception",
"tokenizer": "unicode",
"exceptions": []interface{}{
`[hH][tT][tT][pP][sS]?://(\S)*`,
`[fF][iI][lL][eE]://(\S)*`,
`[fF][tT][pP]://(\S)*`,
},
},
result: analysis.TokenStream{
&analysis.Token{
Term: []byte("what"),
Position: 1,
Start: 0,
End: 4,
},
&analysis.Token{
Term: []byte("ftp://blevesearch.com/"),
Position: 2,
Start: 5,
End: 27,
},
&analysis.Token{
Term: []byte("songs"),
Position: 3,
Start: 28,
End: 33,
},
},
},
{
input: []byte("please email marty@couchbase.com the URL https://blevesearch.com/"),
config: map[string]interface{}{
"type": "exception",
"tokenizer": "unicode",
"exceptions": []interface{}{
`[hH][tT][tT][pP][sS]?://(\S)*`,
`[fF][iI][lL][eE]://(\S)*`,
`[fF][tT][pP]://(\S)*`,
`\S+@\S+`,
},
},
result: analysis.TokenStream{
&analysis.Token{
Term: []byte("please"),
Position: 1,
Start: 0,
End: 6,
},
&analysis.Token{
Term: []byte("email"),
Position: 2,
Start: 7,
End: 12,
},
&analysis.Token{
Term: []byte("marty@couchbase.com"),
Position: 3,
Start: 13,
End: 32,
},
&analysis.Token{
Term: []byte("the"),
Position: 4,
Start: 33,
End: 36,
},
&analysis.Token{
Term: []byte("URL"),
Position: 5,
Start: 37,
End: 40,
},
&analysis.Token{
Term: []byte("https://blevesearch.com/"),
Position: 6,
Start: 41,
End: 65,
},
},
},
}
// remaining := unicode.NewUnicodeTokenizer()
for _, test := range tests {
// build the requested exception tokenizer
cache := registry.NewCache()
tokenizer, err := cache.DefineTokenizer("custom", test.config)
if err != nil {
t.Fatal(err)
}
// pattern := strings.Join(test.patterns, "|")
// r, err := regexp.Compile(pattern)
// if err != nil {
// t.Fatal(err)
// }
// tokenizer := NewExceptionsTokenizer(r, remaining)
actual := tokenizer.Tokenize(test.input)
if !reflect.DeepEqual(actual, test.result) {
t.Errorf("expected %v, got %v", test.result, actual)
}
}
}
================================================
FILE: analysis/tokenizer/letter/letter.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package letter
import (
"unicode"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/character"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "letter"
func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
return character.NewCharacterTokenizer(unicode.IsLetter), nil
}
func init() {
err := registry.RegisterTokenizer(Name, TokenizerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/tokenizer/regexp/regexp.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package regexp
import (
"fmt"
"regexp"
"strconv"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "regexp"
var IdeographRegexp = regexp.MustCompile(`\p{Han}|\p{Hangul}|\p{Hiragana}|\p{Katakana}`)
type RegexpTokenizer struct {
r *regexp.Regexp
}
func NewRegexpTokenizer(r *regexp.Regexp) *RegexpTokenizer {
return &RegexpTokenizer{
r: r,
}
}
func (rt *RegexpTokenizer) Tokenize(input []byte) analysis.TokenStream {
matches := rt.r.FindAllIndex(input, -1)
rv := make(analysis.TokenStream, 0, len(matches))
for i, match := range matches {
matchBytes := input[match[0]:match[1]]
if match[1]-match[0] > 0 {
token := analysis.Token{
Term: matchBytes,
Start: match[0],
End: match[1],
Position: i + 1,
Type: detectTokenType(matchBytes),
}
rv = append(rv, &token)
}
}
return rv
}
func RegexpTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
rval, ok := config["regexp"].(string)
if !ok {
return nil, fmt.Errorf("must specify regexp")
}
r, err := regexp.Compile(rval)
if err != nil {
return nil, fmt.Errorf("unable to build regexp tokenizer: %v", err)
}
return NewRegexpTokenizer(r), nil
}
func init() {
err := registry.RegisterTokenizer(Name, RegexpTokenizerConstructor)
if err != nil {
panic(err)
}
}
func detectTokenType(termBytes []byte) analysis.TokenType {
if IdeographRegexp.Match(termBytes) {
return analysis.Ideographic
}
_, err := strconv.ParseFloat(string(termBytes), 64)
if err == nil {
return analysis.Numeric
}
return analysis.AlphaNumeric
}
================================================
FILE: analysis/tokenizer/regexp/regexp_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package regexp
import (
"reflect"
"regexp"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestBoundary(t *testing.T) {
wordRegex := regexp.MustCompile(`\p{Han}|\p{Hangul}|\p{Hiragana}|\p{Katakana}|\w+`)
tests := []struct {
input []byte
output analysis.TokenStream
}{
{
[]byte("Hello World."),
analysis.TokenStream{
{
Start: 0,
End: 5,
Term: []byte("Hello"),
Position: 1,
Type: analysis.AlphaNumeric,
},
{
Start: 6,
End: 11,
Term: []byte("World"),
Position: 2,
Type: analysis.AlphaNumeric,
},
},
},
{
[]byte("こんにちは世界"),
analysis.TokenStream{
{
Start: 0,
End: 3,
Term: []byte("こ"),
Position: 1,
Type: analysis.Ideographic,
},
{
Start: 3,
End: 6,
Term: []byte("ん"),
Position: 2,
Type: analysis.Ideographic,
},
{
Start: 6,
End: 9,
Term: []byte("に"),
Position: 3,
Type: analysis.Ideographic,
},
{
Start: 9,
End: 12,
Term: []byte("ち"),
Position: 4,
Type: analysis.Ideographic,
},
{
Start: 12,
End: 15,
Term: []byte("は"),
Position: 5,
Type: analysis.Ideographic,
},
{
Start: 15,
End: 18,
Term: []byte("世"),
Position: 6,
Type: analysis.Ideographic,
},
{
Start: 18,
End: 21,
Term: []byte("界"),
Position: 7,
Type: analysis.Ideographic,
},
},
},
{
[]byte(""),
analysis.TokenStream{},
},
}
for _, test := range tests {
tokenizer := NewRegexpTokenizer(wordRegex)
actual := tokenizer.Tokenize(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
}
}
}
func TestBugProducingEmptyTokens(t *testing.T) {
wordRegex := regexp.MustCompile(`[0-9a-zA-Z_]*`)
tests := []struct {
input []byte
output analysis.TokenStream
}{
{
[]byte("Chatha Edwards Sr."),
analysis.TokenStream{
{
Start: 0,
End: 6,
Term: []byte("Chatha"),
Position: 1,
Type: analysis.AlphaNumeric,
},
{
Start: 7,
End: 14,
Term: []byte("Edwards"),
Position: 2,
Type: analysis.AlphaNumeric,
},
{
Start: 15,
End: 17,
Term: []byte("Sr"),
Position: 3,
Type: analysis.AlphaNumeric,
},
},
},
}
for _, test := range tests {
tokenizer := NewRegexpTokenizer(wordRegex)
actual := tokenizer.Tokenize(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
}
}
}
================================================
FILE: analysis/tokenizer/single/single.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package single
import (
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "single"
type SingleTokenTokenizer struct {
}
func NewSingleTokenTokenizer() *SingleTokenTokenizer {
return &SingleTokenTokenizer{}
}
func (t *SingleTokenTokenizer) Tokenize(input []byte) analysis.TokenStream {
return analysis.TokenStream{
&analysis.Token{
Term: input,
Position: 1,
Start: 0,
End: len(input),
Type: analysis.AlphaNumeric,
},
}
}
func SingleTokenTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
return NewSingleTokenTokenizer(), nil
}
func init() {
err := registry.RegisterTokenizer(Name, SingleTokenTokenizerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/tokenizer/single/single_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package single
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestSingleTokenTokenizer(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
{
[]byte("Hello World"),
analysis.TokenStream{
{
Start: 0,
End: 11,
Term: []byte("Hello World"),
Position: 1,
Type: analysis.AlphaNumeric,
},
},
},
{
[]byte("こんにちは世界"),
analysis.TokenStream{
{
Start: 0,
End: 21,
Term: []byte("こんにちは世界"),
Position: 1,
Type: analysis.AlphaNumeric,
},
},
},
{
[]byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"),
analysis.TokenStream{
{
Start: 0,
End: 72,
Term: []byte("แยกคำภาษาไทยก็ทำได้นะจ้ะ"),
Position: 1,
Type: analysis.AlphaNumeric,
},
},
},
}
for _, test := range tests {
tokenizer := NewSingleTokenTokenizer()
actual := tokenizer.Tokenize(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
}
}
}
================================================
FILE: analysis/tokenizer/unicode/unicode.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package unicode
import (
"github.com/blevesearch/segment"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "unicode"
type UnicodeTokenizer struct {
}
func NewUnicodeTokenizer() *UnicodeTokenizer {
return &UnicodeTokenizer{}
}
func (rt *UnicodeTokenizer) Tokenize(input []byte) analysis.TokenStream {
rvx := make([]analysis.TokenStream, 0, 10) // When rv gets full, append to rvx.
rv := make(analysis.TokenStream, 0, 1)
ta := []analysis.Token(nil)
taNext := 0
segmenter := segment.NewWordSegmenterDirect(input)
start := 0
pos := 1
guessRemaining := func(end int) int {
avgSegmentLen := end / (len(rv) + 1)
if avgSegmentLen < 1 {
avgSegmentLen = 1
}
remainingLen := len(input) - end
return remainingLen / avgSegmentLen
}
for segmenter.Segment() {
segmentBytes := segmenter.Bytes()
end := start + len(segmentBytes)
if segmenter.Type() != segment.None {
if taNext >= len(ta) {
remainingSegments := guessRemaining(end)
if remainingSegments > 1000 {
remainingSegments = 1000
}
if remainingSegments < 1 {
remainingSegments = 1
}
ta = make([]analysis.Token, remainingSegments)
taNext = 0
}
token := &ta[taNext]
taNext++
token.Term = segmentBytes
token.Start = start
token.End = end
token.Position = pos
token.Type = convertType(segmenter.Type())
if len(rv) >= cap(rv) { // When rv is full, save it into rvx.
rvx = append(rvx, rv)
rvCap := cap(rv) * 2
if rvCap > 256 {
rvCap = 256
}
rv = make(analysis.TokenStream, 0, rvCap) // Next rv cap is bigger.
}
rv = append(rv, token)
pos++
}
start = end
}
if len(rvx) > 0 {
n := len(rv)
for _, r := range rvx {
n += len(r)
}
rall := make(analysis.TokenStream, 0, n)
for _, r := range rvx {
rall = append(rall, r...)
}
return append(rall, rv...)
}
return rv
}
func UnicodeTokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
return NewUnicodeTokenizer(), nil
}
func init() {
err := registry.RegisterTokenizer(Name, UnicodeTokenizerConstructor)
if err != nil {
panic(err)
}
}
func convertType(segmentWordType int) analysis.TokenType {
switch segmentWordType {
case segment.Ideo:
return analysis.Ideographic
case segment.Kana:
return analysis.Ideographic
case segment.Number:
return analysis.Numeric
}
return analysis.AlphaNumeric
}
================================================
FILE: analysis/tokenizer/unicode/unicode_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package unicode
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/segment"
)
func TestUnicode(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
{
[]byte("Hello World"),
analysis.TokenStream{
{
Start: 0,
End: 5,
Term: []byte("Hello"),
Position: 1,
Type: analysis.AlphaNumeric,
},
{
Start: 6,
End: 11,
Term: []byte("World"),
Position: 2,
Type: analysis.AlphaNumeric,
},
},
},
{
[]byte("steven's"),
analysis.TokenStream{
{
Start: 0,
End: 8,
Term: []byte("steven's"),
Position: 1,
Type: analysis.AlphaNumeric,
},
},
},
{
[]byte("こんにちは世界"),
analysis.TokenStream{
{
Start: 0,
End: 3,
Term: []byte("こ"),
Position: 1,
Type: analysis.Ideographic,
},
{
Start: 3,
End: 6,
Term: []byte("ん"),
Position: 2,
Type: analysis.Ideographic,
},
{
Start: 6,
End: 9,
Term: []byte("に"),
Position: 3,
Type: analysis.Ideographic,
},
{
Start: 9,
End: 12,
Term: []byte("ち"),
Position: 4,
Type: analysis.Ideographic,
},
{
Start: 12,
End: 15,
Term: []byte("は"),
Position: 5,
Type: analysis.Ideographic,
},
{
Start: 15,
End: 18,
Term: []byte("世"),
Position: 6,
Type: analysis.Ideographic,
},
{
Start: 18,
End: 21,
Term: []byte("界"),
Position: 7,
Type: analysis.Ideographic,
},
},
},
{
[]byte("age 25"),
analysis.TokenStream{
{
Start: 0,
End: 3,
Term: []byte("age"),
Position: 1,
Type: analysis.AlphaNumeric,
},
{
Start: 4,
End: 6,
Term: []byte("25"),
Position: 2,
Type: analysis.Numeric,
},
},
},
{
[]byte("カ"),
analysis.TokenStream{
{
Start: 0,
End: 3,
Term: []byte("カ"),
Position: 1,
Type: analysis.Ideographic,
},
},
},
}
for _, test := range tests {
tokenizer := NewUnicodeTokenizer()
actual := tokenizer.Tokenize(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
}
}
}
var sampleLargeInput = []byte(`There are three characteristics of liquids which are relevant to the discussion of a BLEVE:
If a liquid in a sealed container is boiled, the pressure inside the container increases. As the liquid changes to a gas it expands - this expansion in a vented container would cause the gas and liquid to take up more space. In a sealed container the gas and liquid are not able to take up more space and so the pressure rises. Pressurized vessels containing liquids can reach an equilibrium where the liquid stops boiling and the pressure stops rising. This occurs when no more heat is being added to the system (either because it has reached ambient temperature or has had a heat source removed).
The boiling temperature of a liquid is dependent on pressure - high pressures will yield high boiling temperatures, and low pressures will yield low boiling temperatures. A common simple experiment is to place a cup of water in a vacuum chamber, and then reduce the pressure in the chamber until the water boils. By reducing the pressure the water will boil even at room temperature. This works both ways - if the pressure is increased beyond normal atmospheric pressures, the boiling of hot water could be suppressed far beyond normal temperatures. The cooling system of a modern internal combustion engine is a real-world example.
When a liquid boils it turns into a gas. The resulting gas takes up far more space than the liquid did.
Typically, a BLEVE starts with a container of liquid which is held above its normal, atmospheric-pressure boiling temperature. Many substances normally stored as liquids, such as CO2, oxygen, and other similar industrial gases have boiling temperatures, at atmospheric pressure, far below room temperature. In the case of water, a BLEVE could occur if a pressurized chamber of water is heated far beyond the standard 100 °C (212 °F). That container, because the boiling water pressurizes it, is capable of holding liquid water at very high temperatures.
If the pressurized vessel, containing liquid at high temperature (which may be room temperature, depending on the substance) ruptures, the pressure which prevents the liquid from boiling is lost. If the rupture is catastrophic, where the vessel is immediately incapable of holding any pressure at all, then there suddenly exists a large mass of liquid which is at very high temperature and very low pressure. This causes the entire volume of liquid to instantaneously boil, which in turn causes an extremely rapid expansion. Depending on temperatures, pressures and the substance involved, that expansion may be so rapid that it can be classified as an explosion, fully capable of inflicting severe damage on its surroundings.`)
func BenchmarkTokenizeEnglishText(b *testing.B) {
tokenizer := NewUnicodeTokenizer()
b.ResetTimer()
for i := 0; i < b.N; i++ {
tokenizer.Tokenize(sampleLargeInput)
}
}
func TestConvertType(t *testing.T) {
tests := []struct {
in int
out analysis.TokenType
}{
{
segment.Ideo, analysis.Ideographic,
},
{
segment.Kana, analysis.Ideographic,
},
{
segment.Number, analysis.Numeric,
},
{
segment.Letter, analysis.AlphaNumeric,
},
}
for _, test := range tests {
actual := convertType(test.in)
if actual != test.out {
t.Errorf("expected %d, got %d for %d", test.out, actual, test.in)
}
}
}
================================================
FILE: analysis/tokenizer/web/web.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package web
import (
"regexp"
"strings"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/exception"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "web"
var email = `(?:[a-z0-9!#$%&'*+/=?^_` + "`" + `{|}~-]+(?:\.[a-z0-9!#$%&'*+/=?^_` + "`" + `{|}~-]+)*|"(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21\x23-\x5b\x5d-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])*")@(?:(?:[a-z0-9](?:[a-z0-9-]*[a-z0-9])?\.)+[a-z0-9](?:[a-z0-9-]*[a-z0-9])?|\[(?:(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.){3}(?:25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?|[a-z0-9-]*[a-z0-9]:(?:[\x01-\x08\x0b\x0c\x0e-\x1f\x21-\x5a\x53-\x7f]|\\[\x01-\x09\x0b\x0c\x0e-\x7f])+)\])`
var url = `(?i)\b((?:[a-z][\w-]+:(?:/{1,3}|[a-z0-9%])|www\d{0,3}[.]|[a-z0-9.\-]+[.][a-z]{2,4}/)(?:[^\s()<>]+|\(([^\s()<>]+|(\([^\s()<>]+\)))*\))+(?:\(([^\s()<>]+|(\([^\s()<>]+\)))*\)|[^\s` + "`" + `!()\[\]{};:'".,<>?«»“”‘’]))`
var twitterHandle = `@([a-zA-Z0-9_]){1,15}`
var twitterHashtag = `#([a-zA-Z0-9_])+`
var exceptions = []string{email, url, twitterHandle, twitterHashtag}
var exceptionsRegexp = regexp.MustCompile(strings.Join(exceptions, "|"))
func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
remainingTokenizer, err := cache.TokenizerNamed(unicode.Name)
if err != nil {
return nil, err
}
return exception.NewExceptionsTokenizer(exceptionsRegexp, remainingTokenizer), nil
}
func init() {
err := registry.RegisterTokenizer(Name, TokenizerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/tokenizer/web/web_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package web
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
func TestWeb(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
{
[]byte("Hello info@blevesearch.com"),
analysis.TokenStream{
{
Start: 0,
End: 5,
Term: []byte("Hello"),
Position: 1,
Type: analysis.AlphaNumeric,
},
{
Start: 6,
End: 26,
Term: []byte("info@blevesearch.com"),
Position: 2,
Type: analysis.AlphaNumeric,
},
},
},
{
[]byte("That http://blevesearch.com"),
analysis.TokenStream{
{
Start: 0,
End: 4,
Term: []byte("That"),
Position: 1,
Type: analysis.AlphaNumeric,
},
{
Start: 5,
End: 27,
Term: []byte("http://blevesearch.com"),
Position: 2,
Type: analysis.AlphaNumeric,
},
},
},
{
[]byte("Hey @blevesearch"),
analysis.TokenStream{
{
Start: 0,
End: 3,
Term: []byte("Hey"),
Position: 1,
Type: analysis.AlphaNumeric,
},
{
Start: 4,
End: 16,
Term: []byte("@blevesearch"),
Position: 2,
Type: analysis.AlphaNumeric,
},
},
},
{
[]byte("This #bleve"),
analysis.TokenStream{
{
Start: 0,
End: 4,
Term: []byte("This"),
Position: 1,
Type: analysis.AlphaNumeric,
},
{
Start: 5,
End: 11,
Term: []byte("#bleve"),
Position: 2,
Type: analysis.AlphaNumeric,
},
},
},
{
[]byte("What about @blevesearch?"),
analysis.TokenStream{
{
Start: 0,
End: 4,
Term: []byte("What"),
Position: 1,
Type: analysis.AlphaNumeric,
},
{
Start: 5,
End: 10,
Term: []byte("about"),
Position: 2,
Type: analysis.AlphaNumeric,
},
{
Start: 11,
End: 23,
Term: []byte("@blevesearch"),
Position: 3,
Type: analysis.AlphaNumeric,
},
},
},
}
cache := registry.NewCache()
tokenizer, err := cache.TokenizerNamed(Name)
if err != nil {
t.Fatal(err)
}
for _, test := range tests {
actual := tokenizer.Tokenize(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
}
}
}
================================================
FILE: analysis/tokenizer/whitespace/whitespace.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package whitespace
import (
"unicode"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/character"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "whitespace"
func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
return character.NewCharacterTokenizer(notSpace), nil
}
func notSpace(r rune) bool {
return !unicode.IsSpace(r)
}
func init() {
err := registry.RegisterTokenizer(Name, TokenizerConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/tokenizer/whitespace/whitespace_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package whitespace
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/character"
)
func TestBoundary(t *testing.T) {
tests := []struct {
input []byte
output analysis.TokenStream
}{
{
[]byte("Hello World."),
analysis.TokenStream{
{
Start: 0,
End: 5,
Term: []byte("Hello"),
Position: 1,
Type: analysis.AlphaNumeric,
},
{
Start: 6,
End: 12,
Term: []byte("World."),
Position: 2,
Type: analysis.AlphaNumeric,
},
},
},
{
[]byte("こんにちは世界"),
analysis.TokenStream{
{
Start: 0,
End: 21,
Term: []byte("こんにちは世界"),
Position: 1,
Type: analysis.AlphaNumeric,
},
},
},
{
[]byte(""),
analysis.TokenStream{},
},
{
[]byte("abc界"),
analysis.TokenStream{
{
Start: 0,
End: 6,
Term: []byte("abc界"),
Position: 1,
Type: analysis.AlphaNumeric,
},
},
},
}
for _, test := range tests {
tokenizer := character.NewCharacterTokenizer(notSpace)
actual := tokenizer.Tokenize(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("Expected %v, got %v for %s", test.output, actual, string(test.input))
}
}
}
var sampleLargeInput = []byte(`There are three characteristics of liquids which are relevant to the discussion of a BLEVE:
If a liquid in a sealed container is boiled, the pressure inside the container increases. As the liquid changes to a gas it expands - this expansion in a vented container would cause the gas and liquid to take up more space. In a sealed container the gas and liquid are not able to take up more space and so the pressure rises. Pressurized vessels containing liquids can reach an equilibrium where the liquid stops boiling and the pressure stops rising. This occurs when no more heat is being added to the system (either because it has reached ambient temperature or has had a heat source removed).
The boiling temperature of a liquid is dependent on pressure - high pressures will yield high boiling temperatures, and low pressures will yield low boiling temperatures. A common simple experiment is to place a cup of water in a vacuum chamber, and then reduce the pressure in the chamber until the water boils. By reducing the pressure the water will boil even at room temperature. This works both ways - if the pressure is increased beyond normal atmospheric pressures, the boiling of hot water could be suppressed far beyond normal temperatures. The cooling system of a modern internal combustion engine is a real-world example.
When a liquid boils it turns into a gas. The resulting gas takes up far more space than the liquid did.
Typically, a BLEVE starts with a container of liquid which is held above its normal, atmospheric-pressure boiling temperature. Many substances normally stored as liquids, such as CO2, oxygen, and other similar industrial gases have boiling temperatures, at atmospheric pressure, far below room temperature. In the case of water, a BLEVE could occur if a pressurized chamber of water is heated far beyond the standard 100 °C (212 °F). That container, because the boiling water pressurizes it, is capable of holding liquid water at very high temperatures.
If the pressurized vessel, containing liquid at high temperature (which may be room temperature, depending on the substance) ruptures, the pressure which prevents the liquid from boiling is lost. If the rupture is catastrophic, where the vessel is immediately incapable of holding any pressure at all, then there suddenly exists a large mass of liquid which is at very high temperature and very low pressure. This causes the entire volume of liquid to instantaneously boil, which in turn causes an extremely rapid expansion. Depending on temperatures, pressures and the substance involved, that expansion may be so rapid that it can be classified as an explosion, fully capable of inflicting severe damage on its surroundings.`)
func BenchmarkTokenizeEnglishText(b *testing.B) {
tokenizer := character.NewCharacterTokenizer(notSpace)
b.ResetTimer()
for i := 0; i < b.N; i++ {
tokenizer.Tokenize(sampleLargeInput)
}
}
================================================
FILE: analysis/tokenmap/custom.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// package token_map implements a generic TokenMap, often used in conjunction
// with filters to remove or process specific tokens.
//
// Its constructor takes the following arguments:
//
// "filename" (string): the path of a file listing the tokens. Each line may
// contain one or more whitespace separated tokens, followed by an optional
// comment starting with a "#" or "|" character.
//
// "tokens" ([]interface{}): if "filename" is not specified, tokens can be
// passed directly as a sequence of strings wrapped in a []interface{}.
package tokenmap
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
const Name = "custom"
func GenericTokenMapConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.TokenMap, error) {
rv := analysis.NewTokenMap()
// first: try to load by filename
filename, ok := config["filename"].(string)
if ok {
err := rv.LoadFile(filename)
return rv, err
}
// next: look for an inline word list
tokens, ok := config["tokens"].([]interface{})
if ok {
for _, token := range tokens {
tokenStr, ok := token.(string)
if ok {
rv.AddToken(tokenStr)
}
}
return rv, nil
}
return nil, fmt.Errorf("must specify filename or list of tokens for token map")
}
func init() {
err := registry.RegisterTokenMap(Name, GenericTokenMapConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: analysis/tokenmap.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package analysis
import (
"bufio"
"bytes"
"io"
"os"
"strings"
)
type TokenMap map[string]bool
func NewTokenMap() TokenMap {
return make(TokenMap, 0)
}
// LoadFile reads in a list of tokens from a text file,
// one per line.
// Comments are supported using `#` or `|`
func (t TokenMap) LoadFile(filename string) error {
data, err := os.ReadFile(filename)
if err != nil {
return err
}
return t.LoadBytes(data)
}
// LoadBytes reads in a list of tokens from memory,
// one per line.
// Comments are supported using `#` or `|`
func (t TokenMap) LoadBytes(data []byte) error {
bytesReader := bytes.NewReader(data)
bufioReader := bufio.NewReader(bytesReader)
line, err := bufioReader.ReadString('\n')
for err == nil {
t.LoadLine(line)
line, err = bufioReader.ReadString('\n')
}
// if the err was EOF we still need to process the last value
if err == io.EOF {
t.LoadLine(line)
return nil
}
return err
}
func (t TokenMap) LoadLine(line string) {
// find the start of a comment, if any
startComment := strings.IndexAny(line, "#|")
if startComment >= 0 {
line = line[:startComment]
}
tokens := strings.Fields(line)
for _, token := range tokens {
t.AddToken(token)
}
}
func (t TokenMap) AddToken(token string) {
t[token] = true
}
================================================
FILE: analysis/tokenmap_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package analysis
import (
"reflect"
"testing"
)
func TestTokenMapLoadFile(t *testing.T) {
tokenMap := NewTokenMap()
err := tokenMap.LoadFile("test_words.txt")
if err != nil {
t.Fatal(err)
}
expectedTokens := NewTokenMap()
expectedTokens.AddToken("marty")
expectedTokens.AddToken("steve")
expectedTokens.AddToken("dustin")
expectedTokens.AddToken("siri")
expectedTokens.AddToken("multiple")
expectedTokens.AddToken("words")
expectedTokens.AddToken("with")
expectedTokens.AddToken("different")
expectedTokens.AddToken("whitespace")
if !reflect.DeepEqual(tokenMap, expectedTokens) {
t.Errorf("expected %#v, got %#v", expectedTokens, tokenMap)
}
}
================================================
FILE: analysis/type.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package analysis
import (
"fmt"
"time"
)
type CharFilter interface {
Filter([]byte) []byte
}
type TokenType int
const (
AlphaNumeric TokenType = iota
Ideographic
Numeric
DateTime
Shingle
Single
Double
Boolean
IP
)
// Token represents one occurrence of a term at a particular location in a
// field.
type Token struct {
// Start specifies the byte offset of the beginning of the term in the
// field.
Start int `json:"start"`
// End specifies the byte offset of the end of the term in the field.
End int `json:"end"`
Term []byte `json:"term"`
// Position specifies the 1-based index of the token in the sequence of
// occurrences of its term in the field.
Position int `json:"position"`
Type TokenType `json:"type"`
KeyWord bool `json:"keyword"`
}
func (t *Token) String() string {
return fmt.Sprintf("Start: %d End: %d Position: %d Token: %s Type: %d", t.Start, t.End, t.Position, string(t.Term), t.Type)
}
type TokenStream []*Token
// A Tokenizer splits an input string into tokens, the usual behaviour being to
// map words to tokens.
type Tokenizer interface {
Tokenize([]byte) TokenStream
}
// A TokenFilter adds, transforms or removes tokens from a token stream.
type TokenFilter interface {
Filter(TokenStream) TokenStream
}
type Analyzer interface {
Analyze([]byte) TokenStream
}
type DefaultAnalyzer struct {
CharFilters []CharFilter
Tokenizer Tokenizer
TokenFilters []TokenFilter
}
func (a *DefaultAnalyzer) Analyze(input []byte) TokenStream {
if a.CharFilters != nil {
for _, cf := range a.CharFilters {
input = cf.Filter(input)
}
}
tokens := a.Tokenizer.Tokenize(input)
if a.TokenFilters != nil {
for _, tf := range a.TokenFilters {
tokens = tf.Filter(tokens)
}
}
return tokens
}
var ErrInvalidDateTime = fmt.Errorf("unable to parse datetime with any of the layouts")
var ErrInvalidTimestampString = fmt.Errorf("unable to parse timestamp string")
var ErrInvalidTimestampRange = fmt.Errorf("timestamp out of range")
type DateTimeParser interface {
ParseDateTime(string) (time.Time, string, error)
}
const SynonymSourceType = "synonym"
type SynonymSourceVisitor func(name string, item SynonymSource) error
type SynonymSource interface {
Analyzer() string
Collection() string
}
type ByteArrayConverter interface {
Convert([]byte) (interface{}, error)
}
================================================
FILE: analysis/util.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package analysis
import (
"bytes"
"unicode/utf8"
)
func DeleteRune(in []rune, pos int) []rune {
if pos >= len(in) {
return in
}
copy(in[pos:], in[pos+1:])
return in[:len(in)-1]
}
func InsertRune(in []rune, pos int, r rune) []rune {
// create a new slice 1 rune larger
rv := make([]rune, len(in)+1)
// copy the characters before the insert pos
copy(rv[0:pos], in[0:pos])
// set the inserted rune
rv[pos] = r
// copy the characters after the insert pos
copy(rv[pos+1:], in[pos:])
return rv
}
// BuildTermFromRunesOptimistic will build a term from the provided runes
// AND optimistically attempt to encode into the provided buffer
// if at any point it appears the buffer is too small, a new buffer is
// allocated and that is used instead
// this should be used in cases where frequently the new term is the same
// length or shorter than the original term (in number of bytes)
func BuildTermFromRunesOptimistic(buf []byte, runes []rune) []byte {
rv := buf
used := 0
for _, r := range runes {
nextLen := utf8.RuneLen(r)
if used+nextLen > len(rv) {
// alloc new buf
buf = make([]byte, len(runes)*utf8.UTFMax)
// copy work we've already done
copy(buf, rv[:used])
rv = buf
}
written := utf8.EncodeRune(rv[used:], r)
used += written
}
return rv[:used]
}
func BuildTermFromRunes(runes []rune) []byte {
return BuildTermFromRunesOptimistic(make([]byte, len(runes)*utf8.UTFMax), runes)
}
func TruncateRunes(input []byte, num int) []byte {
runes := bytes.Runes(input)
runes = runes[:len(runes)-num]
out := BuildTermFromRunes(runes)
return out
}
func RunesEndsWith(input []rune, suffix string) bool {
inputLen := len(input)
suffixRunes := []rune(suffix)
suffixLen := len(suffixRunes)
if suffixLen > inputLen {
return false
}
for i := suffixLen - 1; i >= 0; i-- {
if input[inputLen-(suffixLen-i)] != suffixRunes[i] {
return false
}
}
return true
}
================================================
FILE: analysis/util_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package analysis
import (
"reflect"
"testing"
)
func TestDeleteRune(t *testing.T) {
tests := []struct {
in []rune
delPos int
out []rune
}{
{
in: []rune{'a', 'b', 'c'},
delPos: 1,
out: []rune{'a', 'c'},
},
}
for _, test := range tests {
actual := DeleteRune(test.in, test.delPos)
if !reflect.DeepEqual(actual, test.out) {
t.Errorf("expected %#v, got %#v", test.out, actual)
}
}
}
func TestInsertRune(t *testing.T) {
tests := []struct {
in []rune
insPos int
insRune rune
out []rune
}{
{
in: []rune{'a', 'b', 'c'},
insPos: 1,
insRune: 'x',
out: []rune{'a', 'x', 'b', 'c'},
},
{
in: []rune{'a', 'b', 'c'},
insPos: 0,
insRune: 'x',
out: []rune{'x', 'a', 'b', 'c'},
},
{
in: []rune{'a', 'b', 'c'},
insPos: 3,
insRune: 'x',
out: []rune{'a', 'b', 'c', 'x'},
},
}
for _, test := range tests {
actual := InsertRune(test.in, test.insPos, test.insRune)
if !reflect.DeepEqual(actual, test.out) {
t.Errorf("expected %#v, got %#v", test.out, actual)
}
}
}
func TestBuildTermFromRunes(t *testing.T) {
tests := []struct {
in []rune
}{
{
in: []rune{'a', 'b', 'c'},
},
{
in: []rune{'こ', 'ん', 'に', 'ち', 'は', '世', '界'},
},
}
for _, test := range tests {
out := BuildTermFromRunes(test.in)
back := []rune(string(out))
if !reflect.DeepEqual(back, test.in) {
t.Errorf("expected %v to convert back to %v", out, test.in)
}
}
}
func TestBuildTermFromRunesOptimistic(t *testing.T) {
tests := []struct {
buf []byte
in []rune
}{
{
buf: []byte("abc"),
in: []rune{'a', 'b', 'c'},
},
{
buf: []byte("こんにちは世界"),
in: []rune{'こ', 'ん', 'に', 'ち', 'は', '世', '界'},
},
// same, but don't give enough buffer
{
buf: []byte("ab"),
in: []rune{'a', 'b', 'c'},
},
{
buf: []byte("こ"),
in: []rune{'こ', 'ん', 'に', 'ち', 'は', '世', '界'},
},
}
for _, test := range tests {
out := BuildTermFromRunesOptimistic(test.buf, test.in)
back := []rune(string(out))
if !reflect.DeepEqual(back, test.in) {
t.Errorf("expected %v to convert back to %v", out, test.in)
}
}
}
func BenchmarkBuildTermFromRunes(b *testing.B) {
input := [][]rune{
{'a', 'b', 'c'},
{'こ', 'ん', 'に', 'ち', 'は', '世', '界'},
}
for i := 0; i < b.N; i++ {
for _, i := range input {
BuildTermFromRunes(i)
}
}
}
================================================
FILE: builder.go
================================================
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"fmt"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
type builderImpl struct {
b index.IndexBuilder
m mapping.IndexMapping
}
func (b *builderImpl) Index(id string, data interface{}) error {
if id == "" {
return ErrorEmptyID
}
doc := document.NewDocument(id)
err := b.m.MapDocument(doc, data)
if err != nil {
return err
}
err = b.b.Index(doc)
return err
}
func (b *builderImpl) Close() error {
return b.b.Close()
}
func newBuilder(path string, mapping mapping.IndexMapping, config map[string]interface{}) (Builder, error) {
if path == "" {
return nil, fmt.Errorf("builder requires path")
}
err := mapping.Validate()
if err != nil {
return nil, err
}
if config == nil {
config = map[string]interface{}{}
}
// the builder does not have an API to interact with internal storage
// however we can pass k/v pairs through the config
mappingBytes, err := util.MarshalJSON(mapping)
if err != nil {
return nil, err
}
config["internal"] = map[string][]byte{
string(util.MappingInternalKey): mappingBytes,
}
// do not use real config, as these are options for the builder,
// not the resulting index
meta := newIndexMeta(scorch.Name, scorch.Name, map[string]interface{}{})
err = meta.Save(path)
if err != nil {
return nil, err
}
config["path"] = indexStorePath(path)
b, err := scorch.NewBuilder(config)
if err != nil {
return nil, err
}
rv := &builderImpl{
b: b,
m: mapping,
}
return rv, nil
}
================================================
FILE: builder_test.go
================================================
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"fmt"
"os"
"testing"
)
func TestBuilder(t *testing.T) {
tmpDir, err := os.MkdirTemp("", "bleve-scorch-builder-test")
if err != nil {
t.Fatal(err)
}
defer func() {
err = os.RemoveAll(tmpDir)
if err != nil {
t.Fatalf("error cleaning up test index")
}
}()
conf := map[string]interface{}{
"batchSize": 2,
"mergeMax": 2,
}
b, err := NewBuilder(tmpDir, NewIndexMapping(), conf)
if err != nil {
t.Fatal(err)
}
for i := 0; i < 10; i++ {
doc := map[string]interface{}{
"name": "hello",
}
err = b.Index(fmt.Sprintf("%d", i), doc)
if err != nil {
t.Fatal(err)
}
}
err = b.Close()
if err != nil {
t.Fatal(err)
}
idx, err := Open(tmpDir)
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Errorf("error closing index: %v", err)
}
}()
docCount, err := idx.DocCount()
if err != nil {
t.Errorf("error checking doc count: %v", err)
}
if docCount != 10 {
t.Errorf("expected doc count to be 10, got %d", docCount)
}
q := NewTermQuery("hello")
q.SetField("name")
req := NewSearchRequest(q)
res, err := idx.Search(req)
if err != nil {
t.Errorf("error searching index: %v", err)
}
if res.Total != 10 {
t.Errorf("expected 10 search hits, got %d", res.Total)
}
}
================================================
FILE: cmd/bleve/cmd/bulk.go
================================================
// Copyright © 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"bufio"
"encoding/json"
"fmt"
"math/rand"
"os"
"github.com/spf13/cobra"
)
var batchSize int
// bulkCmd represents the bulk command
var bulkCmd = &cobra.Command{
Use: "bulk [index path] [data paths ...]",
Short: "bulk loads from newline delimited JSON files",
Long: `The bulk command will perform batch loading of documents in one or more newline delimited JSON files.`,
Annotations: map[string]string{
canMutateBleveIndex: "true",
},
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) < 2 {
return fmt.Errorf("must specify at least one path")
}
i := 0
batch := idx.NewBatch()
for _, file := range args[1:] {
file, err := os.Open(file)
if err != nil {
return err
}
fmt.Printf("Indexing: %s\n", file.Name())
r := bufio.NewReader(file)
for {
if i%batchSize == 0 {
fmt.Printf("Indexing batch (%d docs)...\n", i)
err := idx.Batch(batch)
if err != nil {
return err
}
batch = idx.NewBatch()
}
b, _ := r.ReadBytes('\n')
if len(b) == 0 {
break
}
var doc interface{} = b
var err error
if parseJSON {
err = json.Unmarshal(b, &doc)
if err != nil {
return fmt.Errorf("error parsing JSON: %v", err)
}
}
docID := randomString(5)
err = batch.Index(docID, doc)
if err != nil {
return err
}
i++
}
err = idx.Batch(batch)
if err != nil {
return err
}
err = file.Close()
if err != nil {
return err
}
}
return nil
},
}
var letters = []rune("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
func randomString(n int) string {
b := make([]rune, n)
for i := range b {
b[i] = letters[rand.Intn(len(letters))]
}
return string(b)
}
func init() {
RootCmd.AddCommand(bulkCmd)
bulkCmd.Flags().IntVarP(&batchSize, "batch", "b", 1000, "Batch size for loading.")
bulkCmd.Flags().BoolVarP(&parseJSON, "json", "j", true, "Parse the contents as JSON.")
}
================================================
FILE: cmd/bleve/cmd/check.go
================================================
// Copyright © 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"fmt"
"log"
"github.com/blevesearch/bleve/v2"
"github.com/spf13/cobra"
)
var checkFieldName string
var checkCount int
// checkCmd represents the check command
var checkCmd = &cobra.Command{
Use: "check [index path]",
Short: "checks the contents of the index",
Long: `The check command will perform consistency checks on the index.`,
RunE: func(cmd *cobra.Command, args []string) error {
var fieldNames []string
var err error
if checkFieldName == "" {
fieldNames, err = idx.Fields()
if err != nil {
return err
}
} else {
fieldNames = []string{checkFieldName}
}
fmt.Printf("checking fields: %v\n", fieldNames)
totalProblems := 0
for _, fieldName := range fieldNames {
fmt.Printf("checking field: '%s'\n", fieldName)
problems, err := checkField(idx, fieldName)
if err != nil {
log.Fatal(err)
}
totalProblems += problems
}
if totalProblems != 0 {
return fmt.Errorf("found %d total problems\n", totalProblems)
}
return nil
},
}
func checkField(index bleve.Index, fieldName string) (int, error) {
termDictionary, err := getDictionary(index, fieldName)
if err != nil {
return 0, err
}
fmt.Printf("field contains %d terms\n", len(termDictionary))
numTested := 0
numProblems := 0
for term, count := range termDictionary {
fmt.Printf("checked %d terms\r", numTested)
if checkCount > 0 && numTested >= checkCount {
break
}
tq := bleve.NewTermQuery(term)
tq.SetField(fieldName)
req := bleve.NewSearchRequest(tq)
req.Size = 0
res, err := index.Search(req)
if err != nil {
return 0, err
}
if res.Total != count {
fmt.Printf("unexpected mismatch for term '%s', dictionary %d, search hits %d\n", term, count, res.Total)
numProblems++
}
numTested++
}
fmt.Printf("done checking %d terms, found %d problems\n", numTested, numProblems)
return numProblems, nil
}
func getDictionary(index bleve.Index, field string) (map[string]uint64, error) {
rv := make(map[string]uint64)
i, err := index.Advanced()
if err != nil {
log.Fatal(err)
}
r, err := i.Reader()
if err != nil {
log.Fatal(err)
}
d, err := r.FieldDict(field)
if err != nil {
log.Fatal(err)
}
de, err := d.Next()
for err == nil && de != nil {
rv[de.Term] = de.Count
de, err = d.Next()
}
if err != nil {
return nil, err
}
return rv, nil
}
func init() {
RootCmd.AddCommand(checkCmd)
checkCmd.Flags().StringVarP(&checkFieldName, "field", "f", "", "Restrict check to the specified field name.")
checkCmd.Flags().IntVarP(&checkCount, "count", "c", 100, "Check this many terms.")
}
================================================
FILE: cmd/bleve/cmd/count.go
================================================
// Copyright © 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"fmt"
"github.com/spf13/cobra"
)
// countCmd represents the count command
var countCmd = &cobra.Command{
Use: "count [index path]",
Short: "counts the number documents in the index",
Long: `The count command will count the number of documents in the index.`,
RunE: func(cmd *cobra.Command, args []string) error {
count, err := idx.DocCount()
if err != nil {
return fmt.Errorf("error counting docs in index: %v", err)
}
fmt.Printf("%d\n", count)
return nil
},
}
func init() {
RootCmd.AddCommand(countCmd)
}
================================================
FILE: cmd/bleve/cmd/create.go
================================================
// Copyright © 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"encoding/json"
"fmt"
"os"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/spf13/cobra"
)
var mappingPath, indexType, storeType string
// createCmd represents the create command
var createCmd = &cobra.Command{
Use: "create [index path]",
Short: "creates a new index",
Long: `The create command will create a new empty index.`,
Annotations: map[string]string{
canMutateBleveIndex: "true",
},
PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
// override RootCmd version which opens existing index
if len(args) < 1 {
return fmt.Errorf("must specify path to index")
}
return nil
},
RunE: func(cmd *cobra.Command, args []string) error {
var mapping mapping.IndexMapping
var err error
mapping, err = buildMapping()
if err != nil {
return fmt.Errorf("error building mapping: %v", err)
}
idx, err = bleve.NewUsing(args[0], mapping, indexType, storeType, nil)
if err != nil {
return fmt.Errorf("error creating index: %v", err)
}
// the inherited Post action will close the index
return nil
},
}
func buildMapping() (mapping.IndexMapping, error) {
mapping := mapping.NewIndexMapping()
if mappingPath != "" {
mappingBytes, err := os.ReadFile(mappingPath)
if err != nil {
return nil, err
}
err = json.Unmarshal(mappingBytes, &mapping)
if err != nil {
return nil, err
}
}
return mapping, nil
}
func init() {
RootCmd.AddCommand(createCmd)
createCmd.Flags().StringVarP(&mappingPath, "mapping", "m", "", "Path to a file containing a JSON representation of an index mapping to use.")
createCmd.Flags().StringVarP(&storeType, "store", "s", bleve.Config.DefaultKVStore, "The bleve storage type to use.")
createCmd.Flags().StringVarP(&indexType, "index", "i", bleve.Config.DefaultIndexType, "The bleve index type to use.")
}
================================================
FILE: cmd/bleve/cmd/dictionary.go
================================================
// Copyright © 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"fmt"
"github.com/spf13/cobra"
)
// dictionaryCmd represents the dictionary command
var dictionaryCmd = &cobra.Command{
Use: "dictionary [index path] [field name]",
Short: "prints the term dictionary for the specified field in the index",
Long: `The dictionary command will print the term dictionary for the specified field.`,
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) < 2 {
return fmt.Errorf("must specify field")
}
i, err := idx.Advanced()
if err != nil {
return fmt.Errorf("error getting index: %v", err)
}
r, err := i.Reader()
if err != nil {
return fmt.Errorf("error getting index reader: %v", err)
}
d, err := r.FieldDict(args[1])
if err != nil {
return fmt.Errorf("error getting field dictionary: %v", err)
}
de, err := d.Next()
for err == nil && de != nil {
fmt.Printf("%s - %d\n", de.Term, de.Count)
de, err = d.Next()
}
if err != nil {
return fmt.Errorf("error iterating dictionary: %v", err)
}
return nil
},
}
func init() {
RootCmd.AddCommand(dictionaryCmd)
}
================================================
FILE: cmd/bleve/cmd/dump.go
================================================
// Copyright © 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"fmt"
"github.com/blevesearch/bleve/v2/index/upsidedown"
"github.com/spf13/cobra"
)
var docID string
// dumpCmd represents the dump command
var dumpCmd = &cobra.Command{
Use: "dump [index path]",
Short: "dumps the contents of the index",
Long: `The dump command will dump (possibly a section of) the index.`,
RunE: func(cmd *cobra.Command, args []string) error {
i, err := idx.Advanced()
if err != nil {
return fmt.Errorf("error getting index: %v", err)
}
r, err := i.Reader()
if err != nil {
return fmt.Errorf("error getting index reader: %v", err)
}
upsideDownReader, ok := r.(*upsidedown.IndexReader)
if !ok {
return fmt.Errorf("dump is only supported by index type upsidedown")
}
dumpChan := upsideDownReader.DumpAll()
for rowOrErr := range dumpChan {
switch rowOrErr := rowOrErr.(type) {
case error:
return fmt.Errorf("error dumping: %v", rowOrErr)
case upsidedown.UpsideDownCouchRow:
fmt.Printf("%v\n", rowOrErr)
fmt.Printf("Key: % -100x\nValue: % -100x\n\n", rowOrErr.Key(), rowOrErr.Value())
}
}
return nil
},
}
func init() {
RootCmd.AddCommand(dumpCmd)
}
================================================
FILE: cmd/bleve/cmd/dumpDoc.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"fmt"
"github.com/blevesearch/bleve/v2/index/upsidedown"
"github.com/spf13/cobra"
)
// dumpDocCmd represents the dumpDoc command
var dumpDocCmd = &cobra.Command{
Use: "doc [index path] [doc id]",
Short: "dump only the rows relating to this doc ID",
Long: `The doc sub-command of dump will only dump the rows relating to this doc ID.`,
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) < 2 {
return fmt.Errorf("must specify docid")
}
i, err := idx.Advanced()
if err != nil {
return fmt.Errorf("error getting index: %v", err)
}
r, err := i.Reader()
if err != nil {
return fmt.Errorf("error getting index reader: %v", err)
}
upsideDownReader, ok := r.(*upsidedown.IndexReader)
if !ok {
return fmt.Errorf("dump doc is only supported by index type upsidedown")
}
dumpChan := upsideDownReader.DumpDoc(args[1])
for rowOrErr := range dumpChan {
switch rowOrErr := rowOrErr.(type) {
case error:
return fmt.Errorf("error dumping: %v", rowOrErr)
case upsidedown.UpsideDownCouchRow:
fmt.Printf("%v\n", rowOrErr)
fmt.Printf("Key: % -100x\nValue: % -100x\n\n", rowOrErr.Key(), rowOrErr.Value())
}
}
return nil
},
}
func init() {
dumpCmd.AddCommand(dumpDocCmd)
}
================================================
FILE: cmd/bleve/cmd/dumpFields.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"fmt"
"github.com/blevesearch/bleve/v2/index/upsidedown"
"github.com/spf13/cobra"
)
// dumpFieldsCmd represents the dumpFields command
var dumpFieldsCmd = &cobra.Command{
Use: "fields [index path]",
Short: "dump only the field rows",
Long: `The fields sub-command of dump will only dump the field rows.`,
RunE: func(cmd *cobra.Command, args []string) error {
i, err := idx.Advanced()
if err != nil {
return fmt.Errorf("error getting index: %v", err)
}
r, err := i.Reader()
if err != nil {
return fmt.Errorf("error getting index reader: %v", err)
}
upsideDownReader, ok := r.(*upsidedown.IndexReader)
if !ok {
return fmt.Errorf("dump fields is only supported by index type upsidedown")
}
dumpChan := upsideDownReader.DumpFields()
for rowOrErr := range dumpChan {
switch rowOrErr := rowOrErr.(type) {
case error:
return fmt.Errorf("error dumping: %v", rowOrErr)
case upsidedown.UpsideDownCouchRow:
fmt.Printf("%v\n", rowOrErr)
fmt.Printf("Key: % -100x\nValue: % -100x\n\n", rowOrErr.Key(), rowOrErr.Value())
}
}
return nil
},
}
func init() {
dumpCmd.AddCommand(dumpFieldsCmd)
}
================================================
FILE: cmd/bleve/cmd/fields.go
================================================
// Copyright © 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"fmt"
"github.com/spf13/cobra"
)
// fieldsCmd represents the fields command
var fieldsCmd = &cobra.Command{
Use: "fields [index path]",
Short: "lists the fields in this index",
Long: `The fields command will list the fields used in this index.`,
RunE: func(cmd *cobra.Command, args []string) error {
i, err := idx.Advanced()
if err != nil {
return fmt.Errorf("error getting index: %v", err)
}
r, err := i.Reader()
if err != nil {
return fmt.Errorf("error getting index reader: %v", err)
}
fields, err := r.Fields()
if err != nil {
return fmt.Errorf("error getting fields: %v", err)
}
for i, field := range fields {
fmt.Printf("%d - %s\n", i, field)
}
return nil
},
}
func init() {
RootCmd.AddCommand(fieldsCmd)
}
================================================
FILE: cmd/bleve/cmd/index.go
================================================
// Copyright © 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"encoding/json"
"fmt"
"log"
"os"
"path/filepath"
"github.com/spf13/cobra"
)
var keepDir, keepExt, parseJSON bool
// indexCmd represents the index command
var indexCmd = &cobra.Command{
Use: "index [index path] [data paths ...]",
Short: "adds the files to the index",
Long: `The index command adds the specified files to the index.`,
Annotations: map[string]string{
canMutateBleveIndex: "true",
},
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) < 2 {
return fmt.Errorf("must specify at least one path")
}
for file := range handleArgs(args[1:]) {
var doc interface{}
// index the files
docID := file.filename
if !keepDir {
_, docID = filepath.Split(docID)
}
if !keepExt {
ext := filepath.Ext(docID)
docID = docID[0 : len(docID)-len(ext)]
}
doc = file.contents
var err error
if parseJSON {
err = json.Unmarshal(file.contents, &doc)
if err != nil {
return fmt.Errorf("error parsing JSON: %v", err)
}
}
fmt.Printf("Indexing: %s\n", docID)
err = idx.Index(docID, doc)
if err != nil {
return fmt.Errorf("error indexing: %v", err)
}
}
return nil
},
}
type file struct {
filename string
contents []byte
}
func handleArgs(args []string) chan file {
rv := make(chan file)
go getAllFiles(args, rv)
return rv
}
func getAllFiles(args []string, rv chan file) {
for _, arg := range args {
arg = filepath.Clean(arg)
err := filepath.Walk(arg, func(path string, finfo os.FileInfo, err error) error {
if err != nil {
log.Print(err)
return err
}
if finfo.IsDir() {
return nil
}
bytes, err := os.ReadFile(path)
if err != nil {
log.Fatal(err)
}
rv <- file{
filename: filepath.Base(path),
contents: bytes,
}
return nil
})
if err != nil {
log.Fatal(err)
}
}
close(rv)
}
func init() {
RootCmd.AddCommand(indexCmd)
indexCmd.Flags().BoolVarP(&keepDir, "keepDir", "d", false, "Keep the directory in the document id.")
indexCmd.Flags().BoolVarP(&keepExt, "keepExt", "x", false, "Keep the extension in the document id.")
indexCmd.Flags().BoolVarP(&parseJSON, "json", "j", true, "Parse the contents as JSON.")
}
================================================
FILE: cmd/bleve/cmd/mapping.go
================================================
// Copyright © 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"encoding/json"
"fmt"
"log"
"github.com/spf13/cobra"
)
// mappingCmd represents the mapping command
var mappingCmd = &cobra.Command{
Use: "mapping [index path]",
Short: "prints the mapping used for this index",
Long: `The mapping command prints a JSON representation of the mapping used for this index.`,
Run: func(cmd *cobra.Command, args []string) {
mapping := idx.Mapping()
jsonBytes, err := json.MarshalIndent(mapping, "", " ")
if err != nil {
log.Fatal(err)
}
fmt.Printf("%s\n", jsonBytes)
},
}
func init() {
RootCmd.AddCommand(mappingCmd)
}
================================================
FILE: cmd/bleve/cmd/query.go
================================================
// Copyright © 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"fmt"
"strings"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/search/query"
"github.com/spf13/cobra"
)
var limit, skip, repeat int
var explain, highlight, fields bool
var qtype, qfield, sortby string
// queryCmd represents the query command
var queryCmd = &cobra.Command{
Use: "query [index path] [query]",
Short: "queries the index",
Long: `The query command will execute a query against the index.`,
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) < 2 {
return fmt.Errorf("must specify query")
}
query := buildQuery(args)
for i := 0; i < repeat; i++ {
req := bleve.NewSearchRequestOptions(query, limit, skip, explain)
if highlight {
req.Highlight = bleve.NewHighlightWithStyle("ansi")
}
if fields {
req.Fields = []string{"*"}
}
if sortby != "" {
if strings.Contains(sortby, ",") {
req.SortBy(strings.Split(sortby, ","))
} else {
req.SortBy([]string{sortby})
}
}
res, err := idx.Search(req)
if err != nil {
return fmt.Errorf("error running query: %v", err)
}
fmt.Println(res)
}
return nil
},
}
func buildQuery(args []string) query.Query {
var q query.Query
switch qtype {
case "prefix":
pquery := bleve.NewPrefixQuery(strings.Join(args[1:], " "))
if qfield != "" {
pquery.SetField(qfield)
}
q = pquery
case "term":
pquery := bleve.NewTermQuery(strings.Join(args[1:], " "))
if qfield != "" {
pquery.SetField(qfield)
}
q = pquery
default:
// build a search with the provided parameters
queryString := strings.Join(args[1:], " ")
q = bleve.NewQueryStringQuery(queryString)
}
return q
}
func init() {
RootCmd.AddCommand(queryCmd)
queryCmd.Flags().IntVarP(&repeat, "repeat", "r", 1, "Repeat the query this many times.")
queryCmd.Flags().IntVarP(&limit, "limit", "l", 10, "Limit number of results returned.")
queryCmd.Flags().IntVarP(&skip, "skip", "s", 0, "Skip the first N results.")
queryCmd.Flags().BoolVarP(&explain, "explain", "x", false, "Explain the result scoring.")
queryCmd.Flags().BoolVar(&highlight, "highlight", true, "Highlight matching text in results.")
queryCmd.Flags().BoolVar(&fields, "fields", false, "Load stored fields.")
queryCmd.Flags().StringVarP(&qtype, "type", "t", "query_string", "Type of query to run.")
queryCmd.Flags().StringVarP(&qfield, "field", "f", "", "Restrict query to field, not applicable to query_string queries.")
queryCmd.Flags().StringVarP(&sortby, "sort-by", "b", "", "Sort by field.")
}
================================================
FILE: cmd/bleve/cmd/registry.go
================================================
// Copyright © 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"fmt"
"sort"
"github.com/blevesearch/bleve/v2/registry"
"github.com/spf13/cobra"
)
// registryCmd represents the registry command
var registryCmd = &cobra.Command{
Use: "registry",
Short: "registry lists the bleve components compiled into this executable",
Long: `The registry command will list all of the bleve components compiled into this executable.`,
PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
// override to do nothing
return nil
},
PersistentPostRunE: func(cmd *cobra.Command, args []string) error {
// override to do nothing
return nil
},
Run: func(cmd *cobra.Command, args []string) {
types, instances := registry.CharFilterTypesAndInstances()
printType("Char Filter", types, instances)
types, instances = registry.TokenizerTypesAndInstances()
printType("Tokenizer", types, instances)
types, instances = registry.TokenMapTypesAndInstances()
printType("Token Map", types, instances)
types, instances = registry.TokenFilterTypesAndInstances()
printType("Token Filter", types, instances)
types, instances = registry.AnalyzerTypesAndInstances()
printType("Analyzer", types, instances)
types, instances = registry.DateTimeParserTypesAndInstances()
printType("Date Time Parser", types, instances)
types, instances = registry.KVStoreTypesAndInstances()
printType("KV Store", types, instances)
types, instances = registry.FragmentFormatterTypesAndInstances()
printType("Fragment Formatter", types, instances)
types, instances = registry.FragmenterTypesAndInstances()
printType("Fragmenter", types, instances)
types, instances = registry.HighlighterTypesAndInstances()
printType("Highlighter", types, instances)
},
}
func printType(label string, types, instances []string) {
sort.Strings(types)
sort.Strings(instances)
fmt.Printf("%s Types:\n", label)
for _, name := range types {
fmt.Printf("\t%s\n", name)
}
fmt.Println()
fmt.Printf("%s Instances:\n", label)
for _, name := range instances {
fmt.Printf("\t%s\n", name)
}
fmt.Println()
}
func init() {
RootCmd.AddCommand(registryCmd)
}
================================================
FILE: cmd/bleve/cmd/root.go
================================================
// Copyright © 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"fmt"
"os"
"strconv"
"github.com/blevesearch/bleve/v2"
"github.com/spf13/cobra"
)
var cfgFile string
var idx bleve.Index
// DefaultOpenReadOnly allows some distributions of this command to default
// to always opening the index read-only
var DefaultOpenReadOnly = false
const canMutateBleveIndex = "canMutateBleveIndex"
// CanMutateBleveIndex returns true if the command is capable
// of mutating the bleve index, or false if its operation is
// read-only
func CanMutateBleveIndex(c *cobra.Command) bool {
for k, v := range c.Annotations {
if k == canMutateBleveIndex {
if b, err := strconv.ParseBool(v); err == nil && b {
return true
}
}
}
return false
}
// RootCmd represents the base command when called without any subcommands
var RootCmd = &cobra.Command{
Use: "bleve",
Short: "command-line tool to interact with a bleve index",
Long: `Bleve is a command-line tool to interact with a bleve index.`,
PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
if cmd.Use == "bash" || cmd.Use == "zsh" || cmd.Use == "fish" || cmd.Use == "powershell" {
// Not applicable to cobra's completion subcommands
return nil
}
if len(args) < 1 {
return fmt.Errorf("must specify path to index")
}
runtimeConfig := map[string]interface{}{
"read_only": DefaultOpenReadOnly,
}
var err error
idx, err = bleve.OpenUsing(args[0], runtimeConfig)
if err != nil {
return fmt.Errorf("error opening bleve index: %v", err)
}
return nil
},
PersistentPostRunE: func(cmd *cobra.Command, args []string) error {
if cmd.Use == "bash" || cmd.Use == "zsh" || cmd.Use == "fish" || cmd.Use == "powershell" {
// Not applicable to cobra's completion subcommands
return nil
}
err := idx.Close()
if err != nil {
return fmt.Errorf("error closing bleve index: %v", err)
}
return nil
},
}
// Execute adds all child commands to the root command sets flags appropriately.
// This is called by main.main(). It only needs to happen once to the rootCmd.
func Execute() {
if err := RootCmd.Execute(); err != nil {
fmt.Println(err)
os.Exit(-1)
}
}
================================================
FILE: cmd/bleve/cmd/scorch/ascii.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"fmt"
"strconv"
"github.com/blevesearch/bleve/v2/index/scorch/mergeplan"
"github.com/spf13/cobra"
)
// asciiCmd represents the ascii command
var asciiCmd = &cobra.Command{
Use: "ascii",
Short: "ascii prints an ascii representation of the segments in a snapshot",
Long: `The ascii command prints an ascii representation of the segments in a given snapshot.`,
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) < 2 {
return fmt.Errorf("snapshot epoch required")
} else if len(args) < 3 {
snapshotEpoch, err := strconv.ParseUint(args[1], 10, 64)
if err != nil {
return err
}
snapshot, err := index.LoadSnapshot(snapshotEpoch)
if err != nil {
return err
}
segments := snapshot.Segments()
var mergePlanSegments []mergeplan.Segment
for _, v := range segments {
mergePlanSegments = append(mergePlanSegments, v)
}
str := mergeplan.ToBarChart(args[1], 25, mergePlanSegments, nil)
fmt.Printf("%s\n", str)
}
return nil
},
}
func init() {
RootCmd.AddCommand(asciiCmd)
}
================================================
FILE: cmd/bleve/cmd/scorch/deleted.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"fmt"
"strconv"
"github.com/spf13/cobra"
)
// deletedCmd represents the deleted command
var deletedCmd = &cobra.Command{
Use: "deleted",
Short: "deleted prints the deleted bitmap for segments in the index snapshot",
Long: `The delete command prints the deleted bitmap for segments in the index snapshot.`,
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) < 2 {
return fmt.Errorf("snapshot epoch required")
} else if len(args) < 3 {
snapshotEpoch, err := strconv.ParseUint(args[1], 10, 64)
if err != nil {
return err
}
snapshot, err := index.LoadSnapshot(snapshotEpoch)
if err != nil {
return err
}
segments := snapshot.Segments()
for i, segmentSnap := range segments {
deleted := segmentSnap.Deleted()
fmt.Printf("%d %v\n", i, deleted)
}
}
return nil
},
}
func init() {
RootCmd.AddCommand(deletedCmd)
}
================================================
FILE: cmd/bleve/cmd/scorch/info.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"fmt"
"github.com/spf13/cobra"
)
// dictCmd represents the dict command
var infoCmd = &cobra.Command{
Use: "info",
Short: "info prints basic info about the index",
Long: `The info command prints basic info about the index.`,
RunE: func(cmd *cobra.Command, args []string) error {
reader, err := index.Reader()
if err != nil {
return err
}
count, err := reader.DocCount()
if err != nil {
return err
}
fmt.Printf("doc count: %d\n", count)
// var numSnapshots int
// var rootSnapshot uint64
// index.VisitBoltSnapshots(func(snapshotEpoch uint64) error {
// if rootSnapshot == 0 {
// rootSnapshot = snapshotEpoch
// }
// numSnapshots++
// return nil
// })
// fmt.Printf("has %d snapshot(s), root: %d\n", numSnapshots, rootSnapshot)
return nil
},
}
func init() {
RootCmd.AddCommand(infoCmd)
}
================================================
FILE: cmd/bleve/cmd/scorch/internal.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"fmt"
"strconv"
"github.com/spf13/cobra"
)
var ascii bool
// internalCmd represents the internal command
var internalCmd = &cobra.Command{
Use: "internal",
Short: "internal prints the internal k/v pairs in a snapshot",
Long: `The internal command prints the internal k/v pairs in a snapshot.`,
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) < 2 {
return fmt.Errorf("snapshot epoch required")
} else if len(args) < 3 {
snapshotEpoch, err := strconv.ParseUint(args[1], 10, 64)
if err != nil {
return err
}
snapshot, err := index.LoadSnapshot(snapshotEpoch)
if err != nil {
return err
}
internal := snapshot.Internal()
for k, v := range internal {
if ascii {
fmt.Printf("%s %s\n", k, string(v))
} else {
fmt.Printf("%x %x\n", k, v)
}
}
}
return nil
},
}
func init() {
RootCmd.AddCommand(internalCmd)
internalCmd.Flags().BoolVarP(&ascii, "ascii", "a", false, "print key/value in ascii")
}
================================================
FILE: cmd/bleve/cmd/scorch/root.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"fmt"
"os"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/spf13/cobra"
)
var index *scorch.Scorch
// RootCmd represents the base command when called without any subcommands
var RootCmd = &cobra.Command{
Use: "scorch",
Short: "command-line tool to interact with a scorch index",
Long: `Scorch is a command-line tool to interact with a scorch index.`,
PersistentPreRunE: func(cmd *cobra.Command, args []string) error {
if len(args) < 1 {
return fmt.Errorf("must specify path to scorch index")
}
readOnly := true
config := map[string]interface{}{
"read_only": readOnly,
"path": args[0],
}
idx, err := scorch.NewScorch(scorch.Name, config, nil)
if err != nil {
return err
}
err = idx.Open()
if err != nil {
return fmt.Errorf("error opening: %v", err)
}
index = idx.(*scorch.Scorch)
return nil
},
PersistentPostRunE: func(cmd *cobra.Command, args []string) error {
return nil
},
}
// Execute adds all child commands to the root command sets flags appropriately.
// This is called by main.main(). It only needs to happen once to the rootCmd.
func Execute() {
if err := RootCmd.Execute(); err != nil {
fmt.Println(err)
os.Exit(-1)
}
}
================================================
FILE: cmd/bleve/cmd/scorch/snapshot.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"fmt"
"strconv"
seg "github.com/blevesearch/scorch_segment_api/v2"
"github.com/spf13/cobra"
)
// snapshotCmd represents the snapshot command
var snapshotCmd = &cobra.Command{
Use: "snapshot",
Short: "info prints details about the snapshots in the index",
Long: `The snapshot command prints details about the snapshots in the index.`,
RunE: func(cmd *cobra.Command, args []string) error {
if len(args) < 2 {
snapshotEpochs, err := index.RootBoltSnapshotEpochs()
if err != nil {
return err
}
for _, snapshotEpoch := range snapshotEpochs {
fmt.Printf("snapshot epoch: %d\n", snapshotEpoch)
}
} else if len(args) < 3 {
snapshotEpoch, err := strconv.ParseUint(args[1], 10, 64)
if err != nil {
return err
}
snapshot, err := index.LoadSnapshot(snapshotEpoch)
if err != nil {
return err
}
segments := snapshot.Segments()
for i, segmentSnap := range segments {
segment := segmentSnap.Segment()
if segment, ok := segment.(seg.PersistedSegment); ok {
fmt.Printf("%d %s\n", i, segment.Path())
}
}
}
return nil
},
}
func init() {
RootCmd.AddCommand(snapshotCmd)
}
================================================
FILE: cmd/bleve/cmd/scorch.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package cmd
import (
"github.com/blevesearch/bleve/v2/cmd/bleve/cmd/scorch"
)
// make scorch command-line tool a bleve sub-command
func init() {
RootCmd.AddCommand(scorch.RootCmd)
}
================================================
FILE: cmd/bleve/gendocs.go
================================================
// Copyright © 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build ignore
// +build ignore
package main
import (
"fmt"
"github.com/blevesearch/bleve/v2/cmd/bleve/cmd"
"github.com/spf13/cobra/doc"
)
// you can generate markdown docs by running
//
// $ go run gendocs.go
//
// this also requires doc sub-package of cobra
// which is not kept in this repo
// you can acquire it by running
//
// $ gvt restore
func main() {
cmd.RootCmd.DisableAutoGenTag = true
identity := func(s string) string {
return fmt.Sprintf(`{{< relref "docs/%s" >}}`, s)
}
emptyStr := func(s string) string { return "" }
doc.GenMarkdownTreeCustom(cmd.RootCmd, "./", emptyStr, identity)
}
================================================
FILE: cmd/bleve/main.go
================================================
// Copyright © 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package main
import (
"github.com/blevesearch/bleve/v2/cmd/bleve/cmd"
// to support standard set of build tags
_ "github.com/blevesearch/bleve/v2/config"
)
func main() {
cmd.Execute()
}
================================================
FILE: config/README.md
================================================
# Bleve Config
**NOTE** you probably do not need this package. It is only intended for general purpose applications that want to include large parts of Bleve regardless of whether or not the code is directly using it.
## General Purpose Applications
A general purpose application, that must allow users to express the need for Bleve components at runtime can accomplish this by:
```
import _ "github.com/blevesearch/bleve/config"
```
================================================
FILE: config/config.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package config
import (
// token maps
_ "github.com/blevesearch/bleve/v2/analysis/tokenmap"
// fragment formatters
_ "github.com/blevesearch/bleve/v2/search/highlight/format/ansi"
_ "github.com/blevesearch/bleve/v2/search/highlight/format/html"
// fragmenters
_ "github.com/blevesearch/bleve/v2/search/highlight/fragmenter/simple"
// highlighters
_ "github.com/blevesearch/bleve/v2/search/highlight/highlighter/ansi"
_ "github.com/blevesearch/bleve/v2/search/highlight/highlighter/html"
_ "github.com/blevesearch/bleve/v2/search/highlight/highlighter/simple"
// char filters
_ "github.com/blevesearch/bleve/v2/analysis/char/asciifolding"
_ "github.com/blevesearch/bleve/v2/analysis/char/html"
_ "github.com/blevesearch/bleve/v2/analysis/char/regexp"
_ "github.com/blevesearch/bleve/v2/analysis/char/zerowidthnonjoiner"
// analyzers
_ "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
_ "github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
_ "github.com/blevesearch/bleve/v2/analysis/analyzer/simple"
_ "github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
_ "github.com/blevesearch/bleve/v2/analysis/analyzer/web"
// token filters
_ "github.com/blevesearch/bleve/v2/analysis/token/apostrophe"
_ "github.com/blevesearch/bleve/v2/analysis/token/camelcase"
_ "github.com/blevesearch/bleve/v2/analysis/token/compound"
_ "github.com/blevesearch/bleve/v2/analysis/token/edgengram"
_ "github.com/blevesearch/bleve/v2/analysis/token/elision"
_ "github.com/blevesearch/bleve/v2/analysis/token/keyword"
_ "github.com/blevesearch/bleve/v2/analysis/token/length"
_ "github.com/blevesearch/bleve/v2/analysis/token/lowercase"
_ "github.com/blevesearch/bleve/v2/analysis/token/ngram"
_ "github.com/blevesearch/bleve/v2/analysis/token/reverse"
_ "github.com/blevesearch/bleve/v2/analysis/token/shingle"
_ "github.com/blevesearch/bleve/v2/analysis/token/stop"
_ "github.com/blevesearch/bleve/v2/analysis/token/truncate"
_ "github.com/blevesearch/bleve/v2/analysis/token/unicodenorm"
_ "github.com/blevesearch/bleve/v2/analysis/token/unique"
// tokenizers
_ "github.com/blevesearch/bleve/v2/analysis/tokenizer/exception"
_ "github.com/blevesearch/bleve/v2/analysis/tokenizer/regexp"
_ "github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
_ "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
_ "github.com/blevesearch/bleve/v2/analysis/tokenizer/web"
_ "github.com/blevesearch/bleve/v2/analysis/tokenizer/whitespace"
// date time parsers
_ "github.com/blevesearch/bleve/v2/analysis/datetime/flexible"
_ "github.com/blevesearch/bleve/v2/analysis/datetime/iso"
_ "github.com/blevesearch/bleve/v2/analysis/datetime/optional"
_ "github.com/blevesearch/bleve/v2/analysis/datetime/percent"
_ "github.com/blevesearch/bleve/v2/analysis/datetime/sanitized"
_ "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds"
_ "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds"
_ "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds"
_ "github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds"
// languages
_ "github.com/blevesearch/bleve/v2/analysis/lang/ar"
_ "github.com/blevesearch/bleve/v2/analysis/lang/bg"
_ "github.com/blevesearch/bleve/v2/analysis/lang/ca"
_ "github.com/blevesearch/bleve/v2/analysis/lang/cjk"
_ "github.com/blevesearch/bleve/v2/analysis/lang/ckb"
_ "github.com/blevesearch/bleve/v2/analysis/lang/cs"
_ "github.com/blevesearch/bleve/v2/analysis/lang/da"
_ "github.com/blevesearch/bleve/v2/analysis/lang/de"
_ "github.com/blevesearch/bleve/v2/analysis/lang/el"
_ "github.com/blevesearch/bleve/v2/analysis/lang/en"
_ "github.com/blevesearch/bleve/v2/analysis/lang/es"
_ "github.com/blevesearch/bleve/v2/analysis/lang/eu"
_ "github.com/blevesearch/bleve/v2/analysis/lang/fa"
_ "github.com/blevesearch/bleve/v2/analysis/lang/fi"
_ "github.com/blevesearch/bleve/v2/analysis/lang/fr"
_ "github.com/blevesearch/bleve/v2/analysis/lang/ga"
_ "github.com/blevesearch/bleve/v2/analysis/lang/gl"
_ "github.com/blevesearch/bleve/v2/analysis/lang/hi"
_ "github.com/blevesearch/bleve/v2/analysis/lang/hr"
_ "github.com/blevesearch/bleve/v2/analysis/lang/hu"
_ "github.com/blevesearch/bleve/v2/analysis/lang/hy"
_ "github.com/blevesearch/bleve/v2/analysis/lang/id"
_ "github.com/blevesearch/bleve/v2/analysis/lang/in"
_ "github.com/blevesearch/bleve/v2/analysis/lang/it"
_ "github.com/blevesearch/bleve/v2/analysis/lang/nl"
_ "github.com/blevesearch/bleve/v2/analysis/lang/no"
_ "github.com/blevesearch/bleve/v2/analysis/lang/pl"
_ "github.com/blevesearch/bleve/v2/analysis/lang/pt"
_ "github.com/blevesearch/bleve/v2/analysis/lang/ro"
_ "github.com/blevesearch/bleve/v2/analysis/lang/ru"
_ "github.com/blevesearch/bleve/v2/analysis/lang/sv"
_ "github.com/blevesearch/bleve/v2/analysis/lang/tr"
// kv stores
_ "github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb"
_ "github.com/blevesearch/bleve/v2/index/upsidedown/store/goleveldb"
_ "github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
_ "github.com/blevesearch/bleve/v2/index/upsidedown/store/moss"
// index types
_ "github.com/blevesearch/bleve/v2/index/upsidedown"
)
================================================
FILE: config.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"expvar"
"io"
"log"
"time"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search/highlight/highlighter/html"
index "github.com/blevesearch/bleve_index_api"
)
var bleveExpVar = expvar.NewMap("bleve")
type configuration struct {
Cache *registry.Cache
DefaultHighlighter string
DefaultKVStore string
DefaultMemKVStore string
DefaultIndexType string
SlowSearchLogThreshold time.Duration
analysisQueue *index.AnalysisQueue
}
func (c *configuration) SetAnalysisQueueSize(n int) {
if c.analysisQueue != nil {
c.analysisQueue.Close()
}
c.analysisQueue = index.NewAnalysisQueue(n)
}
func (c *configuration) Shutdown() {
c.SetAnalysisQueueSize(0)
}
func newConfiguration() *configuration {
return &configuration{
Cache: registry.NewCache(),
analysisQueue: index.NewAnalysisQueue(4),
}
}
// Config contains library level configuration
var Config *configuration
func init() {
bootStart := time.Now()
// build the default configuration
Config = newConfiguration()
// set the default highlighter
Config.DefaultHighlighter = html.Name
// default kv store
Config.DefaultKVStore = ""
// default mem only kv store
Config.DefaultMemKVStore = gtreap.Name
// default index
Config.DefaultIndexType = scorch.Name
bootDuration := time.Since(bootStart)
bleveExpVar.Add("bootDuration", int64(bootDuration))
indexStats = NewIndexStats()
bleveExpVar.Set("indexes", indexStats)
initDisk()
}
var logger = log.New(io.Discard, "bleve", log.LstdFlags)
// SetLog sets the logger used for logging
// by default log messages are sent to io.Discard
func SetLog(l *log.Logger) {
logger = l
}
================================================
FILE: config_app.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build appengine || appenginevm
// +build appengine appenginevm
package bleve
// in the appengine environment we cannot support disk based indexes
// so we do no extra configuration in this method
func initDisk() {
}
================================================
FILE: config_disk.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !appengine && !appenginevm
// +build !appengine,!appenginevm
package bleve
import "github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb"
// in normal environments we configure boltdb as the default storage
func initDisk() {
// default kv store
Config.DefaultKVStore = boltdb.Name
}
================================================
FILE: data/test/sample-data.json
================================================
[{"title":"Edinburgh/Leith","name":"Ocean Apartments","address":"2 Western Harbour Midway","directions":null,"phone":"+44 131 553 7394","tollfree":null,"email":null,"fax":null,"url":"http://www.oceanservicedapts.com","checkin":"15H00","checkout":"10H00","price":"from £70","geo":{"lat":55.9812,"lon":-3.2248,"accuracy":"RANGE_INTERPOLATED"},"type":"hotel","id":8576,"country":"United Kingdom","city":"Edinburgh","state":null,"reviews":[{"content":"In my personal opinion, this hotel is one of the many hidden gems of Istanbul. Located in an area not very easy to get if you are driving yourself. I would advise taking a taxi to get there. The service from the first \"hello\" until the last \"bye\" was impecable. The terrace restaurant wiew is second to none. The food was excellent. The staff was very nice. In short, next time I am going to Isnbul, I dont believe I will stay anywehere else. Highly recomended.","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":5.0,"Value":5.0,"Sleep Quality":5.0,"Rooms":5.0,"Location":5.0},"author":"Lindsey Wiegand III","date":"2013-01-01 16:30:13 +0300"},{"content":"When you first arrive at TomTom Suites you might wonder where you are coming to as the area looks a little run down! But it is in a great location in a street with no passing traffic. Its so quiet. Access to the main sites is easy. Downhill, the tram is within a 5 minute walk and uphill, Iskatel Cadesi ( a buzzing main street with loads of shops and restaurants) and a 2 minute walk round the corner leads to some narrow streets full of atmosphere and restaurants This boutique hotel itself is a sea of tranquility with beautifully appointed rooms, a great breakfast and very helpful staff. Our only regret was that the Terrace bar wasn't open but as the weather was unseasonably inclement it wasn't a problem. We had a great time and would definitely recommend the TomTom Suites","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":5.0,"Value":5.0,"Sleep Quality":5.0,"Rooms":5.0,"Location":4.0},"author":"Bulah Weissnat","date":"2013-06-01 17:06:53 +0300"},{"content":"Staying at Tomtom was the best possible choice we could have made, everything was simply perfect: the location, the facilities, the staff, the quality of the food, the elegance and modern style of the furniture, the superb view of the terrace where we had breakfast as well as a wonderful dinner and a lovely evening. Many details made us feel at home, such as the Ipod dock in the rooms and the Ipads we could use free of charge during breakfast. Most of all, the friendliness of the staff, all of them extremely helpful. I'd highlight Chiara, who gave us many tips, especially telling us to go to Bagdah st., on the Asian side, a place great place that no books mentioned; and Ali, a wonderful guy, much more than a concierge, and a perfect host who did everything he could to make our stay as good as it gets. the proximity to Istklal street, and the tram make the location convenient for both day and night. Considering everything, including the fair rates cherged, I doubt there's a better choice in town.","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":5.0,"Value":5.0,"Sleep Quality":5.0,"Rooms":5.0,"Location":5.0},"author":"Ottis Pacocha","date":"2012-08-07 08:16:49 +0300"},{"content":"A really wonderful hotel in a superb location on a traffic free road. It was so peaceful it was easy to forget that we were in the middle of one of the world's busiest cities. The staff were deligthful and couldn't do enough to help. The hotel is in a converted monastary which has been sympathetically refurbished, the artwork depicting istanbul brings a touch of colour - especially in the lift shaft. Our room was enormous and well appointed with a luxurious large bathroom - and yes, a jacuzzi bath. The rooftop restaurant and bar was a perfect way to relax at the end of the day and must have some of the best views in Istanbul over the Bosphorous and the Golden Horn. The food was excellent, modern turkish using french cooking techniques and local produce. Overall I can't wait to go back and I can't recommend the place highly enough.","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":5.0,"Value":5.0,"Sleep Quality":5.0,"Rooms":5.0,"Location":5.0},"author":"Dr. Amira Murazik","date":"2013-10-14 03:30:16 +0300"},{"content":"My wife, mother and I really enjoyed our recent stay at TomTom. We always prefer smaller boutique places that are well designed and in actual neighborhoods vs. larger impersonal luxury hotels. TomTom didn't dissapoint! As previously mentioned, beautifully designed and furnished rooms, the best toiletries, etc. The roof deck is very nice with a great view. The location is amazing, in-between everywhere that you want to go but far enough away from the touristy madness. I'd argue it's one of the best situated hotels in Istanbul. As mentioned, it's a very steep but quick walk up to the main street of Isklal. However, this can be challenging if you have any mobility issues. In general, the area is rather hilly but so is most of Istanbul. In the other direction, you can walk up into a neighborhood renowned for antiques and nice cafes or down to the tram that takes you to the main tourist attractions or the road to get to Ortokoy or Bebek. At no point does it feel like you are in the \"tourist district\" but you never feel unwelcome or out of place. As for noise, we found the building itself to be quiet and well insulated. You didn't hear noise from the rooms around you and minimal noise from the room above. Nowhere near as bad as what an earlier reviewer described. The only real noise to speak of does come from the kids in the morning and the afternoon who walk on the dead-end road to and from school. Other than this, and some kids playing soccer on occasion, the street outside of the hotel is pretty quiet Breakfast in the morning is great. Freshly baked pastries (done at the hotel) and fruit, cheeses and meats, served by extremely friendly staff. Fuat in particular is a delight to interact with and was extremely helpful. The staff overall is very friendly and accommodating. Everyone at the front desk regardless of time of day did whatever they could to make sure we were receiving the best service possible. There are however a few things that need improvement to make the hotel even better. -While breakfast is excellent, we were less than impressed with dinner. To the point that we ate there once and did not choose to repeat the experience. The service was good but the quality of the food and the cooking was pretty bad esp. considering the prices of the food vs. what can be had in the neighborhood for much less. We are admitted foodies, but we didn't hold the hotel restaurant up to lofty expectations. This needs to be addressed. -While the staff is very friendly and eager to help, the hotel would benefit from a dedicated concierge or someone who really knows about the various restaurants around the city. With a hotel of this caliber, we expected them to be more informed about different places and to let us know how traffic can affect getting there, etc. If you didn't ask them these kinds of questions, it didn't occur to them to tell you. -Lastly, with the windows closed in the room there is zero airflow without having the heat on and it's extremely dry. I understand them not wanting to run the AC unless the temperature warrants, but there should be a way to circulate air without opening the windows in the spring (see my earlier comment about noise from school kids in the morning). In conclusion, we wouldn't hesitate to recommend TomTom to friends and hope to stay there again the next time we are in Istanbul.","ratings":{"Service":4.0,"Cleanliness":5.0,"Overall":5.0,"Value":4.0,"Sleep Quality":4.0,"Rooms":5.0,"Location":5.0},"author":"Marcelle Haley","date":"2015-07-17 19:17:23 +0300"},{"content":"We had been at Istanbul for business purposes and used to stay at the well-known brands at Taxim. This time the trip was for leisure and we were looking for a charming, cozy, friendly, clean, staff friendly boutique hotel. So, THIS IS TomTom suites and we consider ourselves very lucky to stay there. Due to the Italian embassy its socak is not crowded and safe 24/7. Breakfast variety was very satisfactory, view from terrace wonderful, staff very friendly, very quiet and clean. If you are looking for a hotel with ID, we strongly recommend it. Be aware that Istiklal street is only 5 minute walking, but uphill.","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":5.0,"Value":5.0,"Location":4.0,"Rooms":5.0},"author":"Peggie Little","date":"2014-04-22 04:05:24 +0300"}],"public_likes":["Ms. Braulio Kuhic"],"vacancy":true,"description":"Modern, stylish contemporary serviced apartments 4 miles for Edinburgh's city Centre.","alias":"Serviced Apartments","pets_ok":false,"free_breakfast":true,"free_internet":false,"free_parking":true}
,{"title":"Edinburgh/Old Town","name":"Euro Hostel Edinburgh","address":null,"directions":null,"phone":"+44 8454 900 461","tollfree":null,"email":null,"fax":null,"url":"http://www.euro-hostels.co.uk/Edinburgh_hostel/","checkin":null,"checkout":null,"price":null,"geo":{"lat":55.94825,"lon":-3.18805,"accuracy":"APPROXIMATE"},"type":"hotel","id":8661,"country":"United Kingdom","city":"Edinburgh","state":null,"reviews":[{"content":"A plain and simple hotel, located on a busy street with many company offices nearby I can't imagine a tourist staying here. It is far away from the city center, and anything that a tourist might be interested in. In general, you will need a car, or a taxi to get to anything unless your business office is within walking distance. About €25 from the airport via taxi, this fairly modern looking hotel is very plain and simple in desgin, layout and comforts. Functional, and at a rate of about €85 per night - it was almost a bargain. Surprisingly quite given the location on the street, my \"no smoking\" room was clearly smoked in. Upon complaining - it was explained to me that the room was in fact no-smoking. No offer to move me, no thought that there might be an issue .... who knows, perhaps they had heard this before. Breakfast included, a simple breakfast that was OK in general, and nothing special. I would only recommend staying here if you are close enough to what you need to go to that you can walk.","ratings":{"Service":3.0,"Cleanliness":2.0,"Overall":2.0,"Value":3.0,"Sleep Quality":2.0,"Rooms":2.0,"Location":1.0},"author":"Rachel O'Hara","date":"2014-03-12 14:29:07 +0300"},{"content":"We (2 couples) recently stayed at the Hotel for 4 nights in their standard rooms -- 1 room off the garden and 1 in the main building.Both rooms were on the small side but the quality of the rooms more than compensated for their size.Breakfast was excellent.The hotels main asset in our eyes were all their staff who were very professional and looked after all our needs exceptionally well.The location of the hotel is excellent close to shops sights and restaurants. If you are travelling to Paris for a short trip i would recommend staying here.","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":4.0,"Value":4.0,"Sleep Quality":5.0,"Rooms":3.0,"Location":5.0},"author":"Viola Reinger","date":"2012-09-21 02:53:29 +0300"}],"public_likes":["Laila Jacobs","Dr. Andreane Berge","Ophelia Walter","Mac Hackett","Belle Bartell","Spencer Erdman","Elna Monahan","Shanelle Hayes","Ms. Wallace Larkin"],"vacancy":false,"description":"Kincaid's Court, Guthrie Street. In Cowgate, open every summer from June 8th until September 2nd. Budget accommodation in 43 apartments used as student residences during term time.","alias":null,"pets_ok":false,"free_breakfast":false,"free_internet":true,"free_parking":true}
,{"title":"Edinburgh/Old Town","name":"The Sheraton Grand Hotel","address":null,"directions":null,"phone":"+44 131 229 9131","tollfree":null,"email":null,"fax":null,"url":null,"checkin":null,"checkout":null,"price":null,"geo":{"lat":55.947,"lon":-3.2073,"accuracy":"APPROXIMATE"},"type":"hotel","id":8662,"country":"United Kingdom","city":"Edinburgh","state":null,"reviews":[{"content":"Really loved this hotel, it was beautifully decorated (very much interior designed) and was spotlessly clean. We had a booked a superior double and when we arrived we were told we'd been upgraded. The room was fairly large by Paris standards and had a day bed, which I assume could sleep a 3rd person/child. The bathroom was really modern and had a bath and large separate shower with large overhead shower head, plus hand held shower. All the decor was tasteful and we were at the back of the hotel overlooking the small courtyard garden, so very quiet, although the hotel is on a quiet street anyway. Short walk to the Jardin du Luxembourg and less that 5 mins to a metro, which was on a direct line to the Gare du Nord, so perfect for us as we took the Eurostar from London. Hotel booked us a table at a great restaurant, superb food, which they recommended. Breakfast was served to order and you got croissants, pain au chocolat, bread and could choose omelettes etc. The fruit salad was freshly made, the yoghurts were the posh ones in glass jars. Would really recommend the hotel, but not for small children - there are a lot of carefully placed vases and objets d'art that little fingers will want to touch...","ratings":{"Cleanliness":5.0,"Sleep Quality":5.0,"Overall":5.0,"Value":4.0,"Service":5.0},"author":"Brittany Ledner Jr.","date":"2012-06-21 07:48:16 +0300"},{"content":"I spent a wonderful week at the Villa Madame, finding the staff very helpful and gracious. My 5th floor room was very clean and light, quiet, and comfortable. The hotel is extremely well located only a few short blocks to the metro, Jardins du Luxemburg, and shopping. The hotel offered free wifi, and much more surprisingly, free international telephone service from my room via voip. Breakfasts were excellent, and the garden area was quiet and comfortable. Just around the corner is Maison du Jardin, a very excellent small restaurant with prix fixe 31 euro dinners. Staff made other great recommendations as well. I would unhesitatingly recommend and will return.","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":5.0,"Value":4.0,"Sleep Quality":5.0,"Rooms":5.0,"Location":5.0},"author":"Lauren Ortiz","date":"2014-10-01 14:04:03 +0300"},{"content":"My friend and I were backbacking through London and Paris and decided to splurge a little on this hotel. It turned out amazing. You really don't want to risk a hostel in Paris, they are serious dumps. I did a hostel in Paris in 2007. I picked this hotel due to it's proximity to nightlife in the Latin Quarter and it looked nice. The bathroom had a walk in shower with glass door. Flatscreen TV had an aux speaker in the bathroom to hear the tv while doing your thing. Beds were very comfortable and the room we got was big and facing the street. The street was loud during mid afternoon due to the Catholic school playground down the block. We booked a room with two singles and when we went to check in we were told they had inadvertantly given our room away. I started to freak because I was sure we would get a fast one pulled as usually happens in western europe. We were happy that after my face of death look we were upgraded to a larger room with two double beds. Unfortunately for the poor lady who had booked the room they bumped her and I don't know where she was then moved. Overall the staff was slow on check'in. Breakfast was carbs and coffee. They have a person dedicated only to breakfast and coffee however it seems odd because this is a very small hotel. We were the youngest guests in the hotel by about 50 years. Everyone else looked like they were late fifties to mid seventee's. We are single thirtysomethings. I would recommend this hotel to anyone who wants a peacefull nights sleep in Paris.","ratings":{"Service":4.0,"Cleanliness":5.0,"Overall":4.0,"Value":2.0,"Location":5.0,"Rooms":4.0},"author":"Turner Ferry","date":"2013-11-29 14:38:35 +0300"},{"content":"The Villa Madame is a lovely, comfortable, well-located boutique hotel with excellent service. My wife and I enjoyed our stay there immensely. This hotel is chic and luxuriously comfortable especially for the price. Admittedly, the room we had (Classic Double Room) was the smallest hotel room my wife and I had ever seen at about 3m x 3.5m (10' x 12'). The large outdoor terrace, fantastic bathroom (with Hermès products and great water pressure), deliciously comfortable bed, iPod docking station, super-fast free wi-fi and excellent service more than made up for the tiny room. Alex at the front desk was a delight as she answered our every question and was always happy to chat and share information. The location is excellent being only a few minutes walk from either the Rennes or St-Sulpice Metro stops, 100m from the beautiful Jardin du Luxembourg and within easy walking distance of the myriad of shops and restaurants on both the Rue de Rennes and Boulevard Saint-Germain. The hotel serves a decent continental breakfast which seems expensive at 18€ but we found a package that was below the normal rate and included breakfast. The breakfast room, as other reviewers have noted, is small and has only four tables for two. All of the tables were occupied each time we went for breakfast but the hotel happily served us as we sat in the little lounge area so it wasn't ever a problem. They don't serve lunch or dinner but there are two brasseries within 50m of the front door and many more restaurants and coffee shops just a few blocks away. There were a few other minor concerns that I had regarding this hotel: - The minibar is stocked only with two bottles of water. It would be nice if there was a limited selection of other beverages and also a kettle for tea/coffee service. - The satellite television has almost 900 channels (yes, I went through them all late one night) but half of them are Arabic and almost all of the rest are in French save for one German language station, a couple of Italian stations and only Bloomberg for the English speakers. Although one doesn't go to Paris to watch TV in the room, it's nice to relax and watch the news or a movie after a long day on the town but unless the above suits you, you won't have much cause to even turn on the set. - The street is very small and it is very difficult to find parking. This is likely not a problem for most visitors but is something to keep in mind especially if you rent a car. Overall, my wife and I loved this hotel. The few cons are heavily outweighed by the comforts we enjoyed. We are very seasoned travelers and despite the tiny room, this hotel experience was one of our best ever. We will definitely stay at the Villa Madame again.","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":5.0,"Value":5.0,"Location":5.0,"Rooms":5.0},"author":"Blaze Williamson","date":"2014-04-26 13:59:54 +0300"}],"public_likes":["Madelynn Littel","Marielle Daugherty","Micah Stiedemann","Sandra Howe","Angela Oga"],"vacancy":true,"description":"21 Festival Square. Against the backdrop of majestic Edinburgh Castle, the Sheraton Grand Hotel and Spa combines city centre convenience with warm Scottish hospitality.","alias":null,"pets_ok":false,"free_breakfast":true,"free_internet":false,"free_parking":false}
,{"title":"Edinburgh/Old Town","name":"Radisson Blu Hotel","address":"80 High St","directions":null,"phone":"+44 131 557 9797","tollfree":null,"email":null,"fax":null,"url":"http://www.radissonblu.co.uk/hotel-edinburgh","checkin":null,"checkout":null,"price":null,"geo":{"lat":55.95014,"lon":-3.18667,"accuracy":"ROOFTOP"},"type":"hotel","id":8663,"country":"United Kingdom","city":"Edinburgh","state":null,"reviews":[],"public_likes":[],"vacancy":false,"description":"The Royal Mile. Less than a five-minute walk from major shopping and business districts, and the Edinburgh International Conference Centre is only a short taxi ride away.","alias":null,"pets_ok":false,"free_breakfast":true,"free_internet":true,"free_parking":false}
,{"title":"Edinburgh/Old Town","name":"Hotel Missoni","address":null,"directions":null,"phone":"+44 131 220 6666","tollfree":null,"email":null,"fax":null,"url":"http://www.hotelmissoni.com/hotelmissoni-edinburgh","checkin":null,"checkout":null,"price":null,"geo":{"lat":55.9491,"lon":-3.19275,"accuracy":"APPROXIMATE"},"type":"hotel","id":8664,"country":"United Kingdom","city":"Edinburgh","state":null,"reviews":[{"content":"We stayed for 3 nights in March, 2 consecutive and then one at the end of the trip. The area near the Termini station is not the prettiest, but it is very convenient if you are using public transportation. My suggestion when exiting Termini station is to go RIGHT and walk down about 4 blocks and then right again and over 2 blocks. We were travelling with quite a bit of luggage and we ended up going the wrong way too many times. We had no problems in the area and loved the convenience of the location. We walked to Termini to either pick up the Metro train or a bus to most all of the sites. The room was spotless and the Breakfast was delicious. Assunta - the owner, could not have been more helpful, although her sense of direction is not like ours in America. Just a short walk maybe a lot longer than we Americans are used to. Hoping on the #70 bus gets you to almost any tourist site (or close to) and most buses all head back to Termini.","ratings":{"Service":5.0,"Cleanliness":5.0,"Overall":4.0,"Value":5.0,"Sleep Quality":5.0,"Rooms":3.0,"Location":3.0},"author":"Hermina Schinner","date":"2015-03-06 22:56:15 +0300"},{"content":"This motel may not be the Ritz Carlton but if your looking for value, a great location and safe neighborhood then you've chosen the right place! It is right across the street from the CBS studios, the line for the price is right is just around the corner. My son and I stayed there for a week and received great service from the maids right on to the front desk. The front desk was more helpful than any concierge I have ever seen and they are just a wonderful hard working family. If I ever go back to LA I will definitely stay at the Beverly Inn even if it's only for the friendly service.","ratings":{"Service":5.0,"Cleanliness":4.0,"Overall":4.0,"Value":5.0,"Sleep Quality":5.0,"Rooms":4.0,"Location":5.0},"author":"Jose Swaniawski Sr.","date":"2014-02-12 20:15:16 +0300"},{"content":"This place is car motel that has seen much better days; the beds are old and offer no support, the televisions, carpets, and furnishings are likewise well-used, and the overall effect can be somewhat depressing. However, it does offer limited parking and convenient location at a very inexpensive rate and the bathrooms were clean; it attracts tourists who are more interested in the surrounding neighborhoods (which are quite nice with wonderful restaurants) and less interested in where they stay for the night. There was a cafe down the street when I was there in 2002, it's along major bus lines, with a terrific market close by across the street. It is also right across from the CBS studios, and about four blocks walking from an old-style diner and bakery. The comments on other sites focus on the ethnic background of the clerks and manager, but I found them pleasant and accommodating. That is why I actually felt safe there as a single female traveler (although I suspect others would not feel this way). This is just slightly above average for what you would expect for the price, but you do get what you pay for.","ratings":{"Cleanliness":1.0,"Overall":2.0,"Value":4.0,"Service":3.0,"Rooms":2.0},"author":"Mr. Ellis Heller","date":"2012-03-02 00:20:56 +0300"},{"content":"Dirty, cock roach infested, unsafe, and very noisy. This motel has not been updated in 25 years or more. It is very noisy (even with ear plugs) because you are a stone throw away from the Dolphin Expressway. The carpet and ceramic floors are filthy along with the furniture and bedding. Someone tried entering my room in the middle of the night, thank goodness for deadbolt and door chain. Photos online are very deceiving. RECOMMEND - DO NOT STAY AT THIS MOTEL. This motel should not be part of the Choice Hotel chain. Should be called \"Last Choice\".","ratings":{"Service":2.0,"Cleanliness":1.0,"Overall":1.0,"Value":1.0,"Sleep Quality":1.0,"Rooms":1.0,"Location":1.0},"author":"Lottie Gerhold IV","date":"2014-06-12 22:56:30 +0300"},{"content":"Creepy, dirty, dark, depressing. It looks like the stereotypical motel in the movies where a drug deal goes bad and people get murdered. Rooms smell of chemical perfumed disinfectant and it burns your nose and lungs... but you will be too afraid for your safety to open a window or door! We stayed at this hotel from 3pm to 10pm (never actually slept overnight, thank god!) because we had a late night flight and it was pouring rain in Miami. The hotel says it's \"newly renovated\" and had free WIFI, so we figured it would be a nice place to spend a few hours and relax. HA! NOOOO! We laid on top of the beds, just to watch tv and instantly became itchy. Nothing about this hotel was \"newly renovated.\" The WIFI was extremely slow. The main picture of this hotel is deceiving. It looks a LOT worse and run-down in real life! Don't bother with this place.","ratings":{"Service":2.0,"Cleanliness":1.0,"Overall":1.0,"Value":1.0,"Sleep Quality":1.0,"Rooms":1.0},"author":"Niko Keebler","date":"2014-12-12 12:32:09 +0300"},{"content":"We didn't stay at the hotel so I can't comment on the rooms. We did leave our car there while we went on a cruise ($5/night parking.) While we were gone someone siphoned ALL of the gas out of our vehicle. I have called the manager twice to alert her to the problem. I've left messages concerning \"a security issue at the hotel\" and no one has returned my call. I guess they don't care about security. Next time I will spend the $20/day to park at the port since that seems to be the only secure parking to be had.","ratings":{"Overall":1.0},"author":"Miss Alysha Goldner","date":"2012-01-17 00:40:45 +0300"},{"content":"I stayed at this motel for one night with my partner in August 2010. We had a flight early in the morning from Miami airport so we wanted a hotel close to the airport. I booked this through BOOKING.COM and payed £45. I have never stayed in Miami before so we did not know what areas were good and what was bad. When we checked in I wasa little concerned as there was a security hut and a guard at the door. We checked in and it was very run down and dirty. They asked for a credit card but I insisted on paying cash as there was no way I was going to hand over my card details. We parked our car and tried to find our room. The halls were all outdoors and very run down. There was a sign saying it had just had a refurb. I could not see where from the outside. It seemed that the area in Miami the motel was in was not a very good one, this concerned us a little. We finally found our room and went in. The room was very basic and smelt of damp. The dead lock on the door was broken and there was no safe in the room. We felt that uncomfortable that at night we pushed our suit cases up to the door. The shower had no presure and was only luke warm and the paint was peeling off from the bathroom. What really appauld us was the floor. I took my shoes and socks off and was walking around in bare foot. After two minutes my partner said \"LOOK AT YOUR FEET\". They were black. The floor was that dirty that in 2 mins my feet were black. I wet a towel and rubbed it along the floor. The towel changed to black and the carpet changed colour. The floor could do with a really good clean. The TV reception was very poor and fuzzy and we gave up in the end. Also the internet/Wifi had little/no signal. It was noisey outside and there were Police sirens sounding all night outside. We found it hard to sleep. We only stayed the night as we had to get up at 4 am to check in for our flight and we checked in at 8pm that evening. Otherwise we would have moved hotel. The only plus side was it was close to the airport. We did not stay for the breakfast but if itn was anything like the room we would have passed anyway. We did not use the pool as it was on the otherside of the motel right next to a main highway. It looked dirty and very uninviting. Please only stay at this motel if you have to or if it is free and you are feeling brave.","ratings":{"Service":1.0,"Cleanliness":1.0,"Overall":1.0,"Value":1.0,"Sleep Quality":1.0,"Rooms":2.0,"Location":3.0},"author":"Miss Weldon Flatley","date":"2015-05-21 14:38:02 +0300"}],"public_likes":["Ms. Jaleel Bartell","Rodger Jerde","Hanna Simonis"],"vacancy":false,"description":"1 George IV Bridge. Situated on the Royal Mile and designed by Rosita Missoni.","alias":null,"pets_ok":true,"free_breakfast":true,"free_internet":false,"free_parking":false}
,{"title":"Edinburgh/South","name":"Argyle Backpackers","address":"14 Argyle Pl","directions":"The number 41 bus (catch it outside Waverley railway station) goes right past the front door.","phone":"+44 131 667 9991","tollfree":null,"email":null,"fax":null,"url":"http://www.argyle-backpackers.co.uk/","checkin":null,"checkout":null,"price":"Dorm from £13","geo":{"lat":55.9385,"lon":-3.1912,"accuracy":"ROOFTOP"},"type":"hotel","id":8685,"country":"United Kingdom","city":"Edinburgh","state":null,"reviews":[{"content":"Made a one night reservation at this \"hotel\" without checking the reviews, big mistake. The hotel looks OK from the highway and looked conveniently located. Should have known when the very unfriendly girl at the front desk gave me the room key wrapped in a post-it note because they had run out of the little envelopes. The room smelled weird, when we opened the chest drawers we saw what looked like small roaches, we never took anything out of the suitcase. It was very cold that night in Miami and the heating unit blew cold air. The next morning we woke up to find out there was no hot water in the whole hotel, after many calls and trips to the front desk I was informed that there was someone coming to fix the boiler. To make a long story short, the hot water came back on at 12:00pm; checkout time is 11:00am. They wanted us to leave without taking a shower, the maid was also annoyed at us because we didn't leave the room and she had to finish to go home. Later that night while at a restaurant in South Beach, I noticed some itchy bumps on the left side of the back of my neck and scalp, as well as the knuckles of my fingers, they turned out to be BED BUG BITES!!!","ratings":{"Service":1.0,"Cleanliness":1.0,"Overall":1.0,"Value":1.0,"Sleep Quality":1.0,"Rooms":1.0,"Location":4.0},"author":"Consuelo Thiel","date":"2013-03-28 07:41:35 +0300"},{"content":"Restaurant service was o.k. Your continental breakfast is a joke. We were there 2 nights. We had 4 rooms which were reserved in August 2008. The last couple to get there Friday niight got a terrible room. Nothing in it but the bed. They wre given a different room the next morning but the damage was done. No we will not be back.","ratings":{"Cleanliness":1.0,"Overall":1.0,"Value":1.0,"Service":1.0,"Rooms":1.0},"author":"Ettie Bartell","date":"2012-09-04 06:14:54 +0300"},{"content":"Disappointing, after all these years... I have been staying at the Capri since 2001, when my family moved from SF to the North Bay. It used to be a great deal for us ex-pats and while not a luxury hotel, it was clean and comfortable (plus in a great location).My son and I stayed at the Capri this summer and we were terribly let down. They no longer offer specials and while there is renovation occurring, our room was dingy and the bed very uncomfortable. My 9 year old son said, Mom, let's not stay at the Capri anymore. Sadly, I had to agree.","ratings":{"Service":3.0,"Business service":-1.0,"Cleanliness":2.0,"Check in / front desk":3.0,"Overall":2.0,"Value":2.0,"Rooms":2.0,"Location":4.0},"author":"Harry O'Kon I","date":"2014-09-10 11:07:33 +0300"},{"content":"Best Deal in the Marina If you don't mind 1960's decor this place will fit the bill. It's very reasonable and very clean. Sometimes it books up with the Euro tours. Ask to stay on the 3rd floor, with the high ceilings and roof windows. I have actually heard the fog horns at night. There is also plenty of free parking right on-site.This hotel is right off Union street and within walking distance of the Marina Green. Tons of great restaurants and clubs. One of my favorites is the Brazen Head, which is right across the street. A small English Pub with great food and drinks. It's hard to spot but there's a real small sign out front. Also, El Canasta (sp?) has a great steak burrito.","ratings":{"Service":-1.0,"Business service":-1.0,"Cleanliness":-1.0,"Check in / front desk":-1.0,"Overall":4.0,"Value":-1.0,"Rooms":-1.0,"Location":-1.0},"author":"Sibyl Lind","date":"2014-09-20 18:19:21 +0300"},{"content":"Bed Bugs and Ants The ants didn't bother me. It was the bed bugs I detested.Before staying at the Buena Vista I didn't know what a bed bug looked. But the spots on my arms that looked like flea bites kept appearing after I got home. So I googled them.Yep, sure enough! Up popped a photo of the same type of bug that I had killed after I found it crawling on my husband's pillow while we were staying at the Buena Vista this January 2008!I ordered some all natural bed bug powder to dust all over my house. Even my kids are showing up with the spots.When I called the hotel after I got home to tell them about it they said they would block off that room. But when I called a day or two later there was someone in that room. It doesn't matter if they block off and treat one room. They've got to do the entire premises.","ratings":{"Service":5.0,"Business service":-1.0,"Cleanliness":1.0,"Check in / front desk":5.0,"Overall":2.0,"Value":5.0,"Rooms":1.0,"Location":5.0},"author":"Deshawn Rippin","date":"2014-10-17 15:46:51 +0300"},{"content":"Not a great place to stay This place is falling apart...was once a nice little place to stay but not now. Lobby was shabby and dirty, room was not better...there was mold in the tub, no movies here to buy on cable, iron in room was broke....etc. For the $160 they charged per night ( with a AAA card), I would not go back...there is a very nice small, totally remodeled motel two blocks down that we should have stayed at for the same price and most definitly will next time...It is called Hotel Del Sol...check it out, thehoteldelsol.com....much, much better for the $$$$.","ratings":{"Service":2.0,"Business service":1.0,"Cleanliness":1.0,"Check in / front desk":3.0,"Overall":2.0,"Value":1.0,"Rooms":2.0,"Location":4.0},"author":"Wayne Tremblay III","date":"2012-10-27 01:48:16 +0300"},{"content":"Great Budget Accommodation If you want a small budget sized no frills hotel that offers a resonable level of service then this is for you. The rooms were large and the housekeeping very good. The front desk service was always helpful and friendly with good advice. Traffic noise was not as bad as expected and nor was the beds. Only minus was the continental breakfast American style which differed somewhat from what we experienced in other countries. Too sweet for our taste.Overall a great experience and well located although being closer to eateries would have been appreciated. Can recommend the Liquor Store accross the road. Their staff were great!","ratings":{"Service":3.0,"Business service":-1.0,"Cleanliness":3.0,"Check in / front desk":-1.0,"Overall":3.0,"Value":3.0,"Rooms":4.0,"Location":-1.0},"author":"Margaretta Miller","date":"2012-04-19 20:46:49 +0300"}],"public_likes":["Narciso Wiegand","Graciela Bailey","Kavon Bruen","Aditya Feest","Caleb Medhurst","Ross Rippin","Germaine Kunde"],"vacancy":true,"description":"Two good self-catering kitchens, garden, conservatory/seating area, choice of different sized dorms, and private rooms.Definitely not a party hostel.","alias":null,"pets_ok":true,"free_breakfast":true,"free_internet":true,"free_parking":false}
,{"title":"Abbeville","name":"Chez Mel","alt":null,"address":"63-65 rue Saint-Vulfran","directions":null,"phone":"+33 3 22 19 48 64","tollfree":null,"email":null,"url":null,"hours":null,"image":null,"price":null,"content":"With an old style setting and musical accompaniment, this is a hearty and family-friendly crêpe restaurant. It is also a tea room in the afternoon.","geo":{"lat":50.104437,"lon":1.829432,"accuracy":"RANGE_INTERPOLATED"},"activity":"eat","type":"landmark","id":33,"country":"France","city":"Abbeville","state":"Picardie"}
,{"title":"Aberdour","name":"Aberdour Castle","alt":null,"address":null,"directions":null,"phone":null,"tollfree":null,"email":null,"url":"http://www.historic-scotland.gov.uk/propertyresults/propertyoverview.htm?PropID=PL_001","hours":null,"image":null,"price":null,"content":"Is a fascinating, 12th-century castle which was granted by Robert the Bruce to his friend and nephew, Thomas Randolph, Earl of Moray. It includes the beautiful and well-maintained castle gardens, as well as a spectacular beehive-shaped dovecot built at the end of the sixteenth century.","geo":{"lat":56.0552,"lon":-3.2985,"accuracy":"APPROXIMATE"},"activity":"see","type":"landmark","id":35,"country":"United Kingdom","city":"Aberdour","state":null}
,{"title":"Aberdour","name":"The Silver Sands Beach","alt":null,"address":null,"directions":null,"phone":null,"tollfree":null,"email":null,"url":null,"hours":null,"image":null,"price":null,"content":"is one of Scotland's seven Blue Flag awarded beaches, and is incredibly popular in summer time. For those after a bit of peace and quiet, the '''Black Sands Beach''' may be more to your tastes.","geo":{"lat":56.0544,"lon":-3.2863,"accuracy":"ROOFTOP"},"activity":"see","type":"landmark","id":36,"country":"United Kingdom","city":"Aberdour","state":null}
,{"title":"Aberdour","name":"Aberdour Railway Station","alt":null,"address":null,"directions":null,"phone":null,"tollfree":null,"email":null,"url":null,"hours":null,"image":null,"price":null,"content":"is a beautifully kept and cared for example of a traditional station, and regularly wins the "Best Station and Gardens in Great Britain" award.","geo":{"lat":56.05471,"lon":-3.30089,"accuracy":"RANGE_INTERPOLATED"},"activity":"see","type":"landmark","id":37,"country":"United Kingdom","city":"Aberdour","state":null}]
================================================
FILE: doc.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*
Package bleve is a library for indexing and searching text.
Example Opening New Index, Indexing Data
message := struct{
Id: "example"
From: "xyz@couchbase.com",
Body: "bleve indexing is easy",
}
mapping := bleve.NewIndexMapping()
index, _ := bleve.New("example.bleve", mapping)
index.Index(message.Id, message)
Example Opening Existing Index, Searching Data
index, _ := bleve.Open("example.bleve")
query := bleve.NewQueryStringQuery("bleve")
searchRequest := bleve.NewSearchRequest(query)
searchResult, _ := index.Search(searchRequest)
*/
package bleve
================================================
FILE: docs/create_and_search_your_first_index.md
================================================
# Creating a Bleve Index
A simple how-to example using Bleve in Go to create an index, add documents, and run search queries with results.
```go
package main
import (
"fmt"
"log"
bleve "github.com/blevesearch/bleve/v2"
)
type Document struct {
ID string `json:"id"`
Title string `json:"title"`
Content string `json:"content"`
}
func main() {
indexPath := "example.bleve"
// Create a new index
mapping := bleve.NewIndexMapping()
index, err := bleve.New(indexPath, mapping)
if err != nil {
log.Fatal(err)
}
defer index.Close()
// Add documents
documents := []Document{
{
ID: "doc",
Title: "Bleve documentation",
Content: "Bleve provides full-text search capabilities.",
},
{
ID: "doc1",
Title: "Elasticsearch documentation",
Content: "Elasticsearch provides full-text search capabilities as well.",
},
}
// Iterate and index the documents
batch := index.NewBatch()
for _, doc := range documents {
batch.Index(doc.ID, doc)
}
if err := index.Batch(batch); err != nil {
log.Fatal(err)
}
// Search the created index
query := bleve.NewQueryStringQuery("bleve")
searchRequest := bleve.NewSearchRequest(query)
searchRequest.Explain = true
searchRequest.Fields = []string{"title", "content"}
searchResult, err := index.Search(searchRequest)
if err != nil {
log.Fatal(err)
}
fmt.Println(searchResult)
}
```
## Output
```bash
$ go run main.go
1 matches, showing 1 through 1, took 262.333µs
1. doc (0.471405)
title
Bleve documentation
content
Bleve provides full-text search capabilities.
```
## Step-by-Step Breakdown
### 1. Index Creation
```go
// Create a new index mapping
mapping := bleve.NewIndexMapping()
// Create a new index (this creates a directory on disk)
index, err := bleve.New(indexPath, mapping)
```
**What happens:**
- Creates an index mapping with default settings
- Creates a new index directory `example.bleve/`
- Sets up the underlying storage (Scorch engine by default)
### 2. Document Indexing
```go
// Index a document with a unique ID
err := index.Index("doc", map[string]interface{}{
"title": "My Document",
"content": "This is the document content",
"author": "John Doe",
})
```
**What happens:**
- Document gets a unique ID (`doc`)
- Fields are automatically mapped based on their Go types
- Text fields are analyzed (tokenized, lowercased, etc.) based on the mapping chosen (here, the default one)
- Document is stored in the search index
### 3. Searching
```go
// Create a query
query := bleve.NewQueryStringQuery("search terms")
request := bleve.NewSearchRequest(query)
// Execute search
results, err := index.Search(request)
```
**What happens:**
- Query string is parsed and analyzed
- Index is searched for matching documents
- Results are scored and ranked by relevance by the algorithm used
- Document metadata and highlights are returned
## Working with Existing Indexes
To open an existing index instead of creating a new one:
```go
// Open existing index
index, err := bleve.Open("example.bleve")
if err != nil {
log.Fatal(err)
}
defer index.Close()
```
## Different Query Types
### 1. Query String Query (Simple)
```go
query := bleve.NewQueryStringQuery("golang programming")
```
### 2. Match Query (Exact Field)
```go
query := bleve.NewMatchQuery("bleve")
query.SetField("title") // Search only in title field
```
### 3. Boolean Query (Complex)
```go
mustQuery := bleve.NewMatchQuery("golang")
shouldQuery := bleve.NewMatchQuery("programming")
boolQuery := bleve.NewBooleanQuery()
boolQuery.AddMust(mustQuery)
boolQuery.AddShould(shouldQuery)
```
### 4. Range Query (Numeric/Date)
```go
minPrice := 20.50
maxPrice := 40.75
query := bleve.NewNumericRangeQuery(&minPrice, &maxPrice)
query.SetField("price")
```
## Advanced Index Configuration
### Custom Field Mapping
```go
// We can create customised mapping as well by specifying about analyzers
mapping := bleve.NewIndexMapping()
// Text field with custom analyzer
titleMapping := bleve.NewTextFieldMapping()
titleMapping.Analyzer = "en" // English analyzer
// Numeric field
priceMapping := bleve.NewNumericFieldMapping()
// Date field
dateMapping := bleve.NewDateTimeFieldMapping()
// Document mapping
docMapping := bleve.NewDocumentMapping()
docMapping.AddFieldMappingsAt("title", titleMapping)
docMapping.AddFieldMappingsAt("price", priceMapping)
docMapping.AddFieldMappingsAt("created_at", dateMapping)
// Add to index mapping
mapping.AddDocumentMapping("product", docMapping)
// Create index with custom mapping
index, err := bleve.New("products.bleve", mapping)
```
### Batch Operations
For better performance when indexing many documents, we can do indexing in batches:
```go
batch := index.NewBatch()
for _, doc := range documents {
batch.Index(doc.ID, doc)
}
// Execute batch
err := index.Batch(batch)
```
================================================
FILE: docs/geo.md
================================================
# Geo spatial search
Redirect to [geo/README.md](https://github.com/blevesearch/bleve/blob/master/geo/README.md)
================================================
FILE: docs/hierarchy.md
================================================
# Hierarchical nested search
* *v2.6.0* (and after) will come with support for **Array indexing and hierarchical nested search**.
* We've achieved this by embedding nested documents within our bleve (scorch) indexes.
* Usage of zap file format: [v17](https://github.com/blevesearch/zapx/blob/master/zap.md). Here we preserve hierarchical document relationships within segments, continuing to conform to the segmented architecture of *scorch*.
## Supported
* Indexing `Arrays` allows specifying fields that contain arrays of objects. Each object in the array can have its own set of fields, enabling the representation of hierarchical data structures within a single document.
```json
{
"id": "1",
"name": "John Doe",
"addresses": [
{
"type": "home",
"street": "123 Main St",
"city": "Hometown",
"zip": "12345"
},
{
"type": "work",
"street": "456 Corporate Blvd",
"city": "Metropolis",
"zip": "67890"
}
]
}
```
* Multi-level arrays: Arrays can contain objects that themselves have array fields, allowing for deeply nested structures, such as a list of projects, each with its own list of tasks.
```json
{
"id": "2",
"name": "Jane Smith",
"projects": [
{
"name": "Project Alpha",
"tasks": [
{"title": "Task 1", "status": "completed"},
{"title": "Task 2", "status": "in-progress"}
]
},
{
"name": "Project Beta",
"tasks": [
{"title": "Task A", "status": "not-started"},
{"title": "Task B", "status": "completed"}
]
}
]
}
```
* Multiple arrays: A document can have multiple fields that are arrays, each representing different hierarchical data, such as a list of phone numbers and a list of email addresses.
```json
{
"id": "3",
"name": "Alice Johnson",
"phones": [
{"type": "mobile", "number": "555-1234"},
{"type": "home", "number": "555-5678"}
],
"emails": [
{"type": "personal", "address": "alice@example.com"},
{"type": "work", "address": "alice@work.com"}
]
}
```
* Hybrid arrays: Multi-level and multiple arrays can be combined within the same document to represent complex hierarchical data structures, such as a company with multiple departments, each having its own list of employees and projects.
```json
{
"id": "doc1",
"company": {
"id": "c1",
"name": "TechCorp",
"departments": [
{
"name": "Engineering",
"budget": 2000000,
"employees": [
{"name": "Alice", "role": "Engineer"},
{"name": "Bob", "role": "Manager"}
],
"projects": [
{"title": "Project X", "status": "ongoing"},
{"title": "Project Y", "status": "completed"}
]
},
{
"name": "Sales",
"budget": 300000,
"employees": [
{"name": "Eve", "role": "Salesperson"},
{"name": "Mallory", "role": "Manager"}
],
"projects": [
{"title": "Project A", "status": "completed"},
{"title": "Project B", "status": "ongoing"}
]
}
],
"locations": [
{"city": "Athens","country": "Greece"},
{"city": "Berlin","country": "USA"}
]
}
}
```
* Earlier versions of Bleve only supported flat arrays of primitive types (e.g., strings, numbers), and would flatten nested structures, losing the hierarchical relationships, so the above complex documents could not be accurately represented or queried. For example, the "employees" and "projects" fields within each department would be flattened, making it impossible to associate employees with their respective departments.
* From v2.6.0 onwards, Bleve allows for accurate representation and querying of complex nested structures, preserving the relationships between different levels of the hierarchy, across multi-level, multiple and hybrid arrays.
* The addition of `nested` document mappings enable defining fields that contain arrays of objects, giving the option to preserve the hierarchical relationships within the array during indexing. Having `nested` as false (default) will flatten the objects within the array, losing the hierarchy, which was the earlier behavior.
```json
{
"departments": {
"dynamic": false,
"enabled": true,
"nested": true,
"properties": {
"employees": {
"dynamic": false,
"enabled": true,
"nested": true
},
"projects": {
"dynamic": false,
"enabled": true,
"nested": true
}
}
},
"locations": {
"dynamic": false,
"enabled": true,
"nested": true
}
}
```
* Any Bleve query (e.g., `match`, `phrase`, `term`, `fuzzy`, `numeric/date range` etc.) can be executed against fields within nested documents, with no special handling required. The query processor will automatically traverse the nested structures to find matches. Additional search constructs
like vector search, synonym search, hybrid and pre-filtered vector search integrate seamlessly with hierarchy search.
* Conjunction Queries (AND queries) and other queries that depend on term co-occurrence within the same hierarchical context will respect the boundaries of nested documents. This means that terms must appear within the same nested object to be considered a match. For example, a conjunction query searching for an employee named "Alice" with the role "Engineer" within the "Engineering" department will only return results where both name and role terms are found within the same employee object, which is itself within a "Engineering" department object.
* Some other search constructs will have enhanced precision with hierarchy search.
* Field-Level Highlighting: Only fields within the matched nested object are retrieved and highlighted, ensuring highlights appear in the correct hierarchical context. For example, a match in `departments[name=Engineering].employees` highlights only employees in that department.
* Nested Faceting / Aggregations: Facets are computed within matched nested objects, producing context-aware buckets. E.g., a facet on `departments.projects.status` returns ongoing or completed only for projects in matched departments.
* Sorting by Nested Fields: Sorting can use fields from the relevant nested object, e.g., ordering companies by `departments.budget` sorts based on the budget of the specific matched department, not unrelated departments.
* Vector Search (KNN / Multi-KNN): When a document contains an array of objects with vector/multi-vector fields, the final document score and ranking are identical whether or not the array is marked as `nested`. In both cases, the highest-scoring vector is selected; either directly from the array (non-nested) or from the best-matching nested object with its score bubbled up to the parent document.
* Pre-Filtered Vector Search: When vector search is combined with filters on fields inside a nested array, the filters are applied first to pick which nested items are eligible. Vector similarity is then computed only on the vector fields of those filtered nested objects. For example, if `departments.employees` is a `nested` array, a pre-filtered KNN query for employees with a `skills_vector` matching `machine learning engineer`, a role of `Manager`, and belonging to the `Sales` department will first narrow the candidate set to only employees who meet the requirement, and then compute vector similarity on the `skills_vector` of that filtered subset. This ensures that vector search results come only from the employees that satisfy the filter, and not from unrelated employees in other departments.
## Indexing
Below is an example of using the Bleve API to index documents with hierarchical structures, using hybrid arrays and nested mappings.
```go
// Define a document to be indexed.
docJSON :=
`{
"company": {
"id": "c3",
"name": "WebSolutions",
"departments": [
{
"name": "HR",
"budget": 800000,
"employees": [
{"name": "Eve", "role": "Manager"},
{"name": "Frank", "role": "HR"}
],
"projects": [
{"title": "Project Beta", "status": "completed"},
{"title": "Project B", "status": "ongoing"}
]
},
{
"name": "Engineering",
"budget": 200000,
"employees": [
{"name": "Heidi", "role": "Support Engineer"},
{"name": "Ivan", "role": "Manager"}
],
"projects": [
{"title": "Project Helpdesk", "status": "ongoing"},
{"title": "Project FAQ", "status": "completed"}
]
}
],
"locations": [
{"city": "Edinburgh", "country": "UK"},
{"city": "London", "country": "Canada"}
]
}
}`
// Define departments as a nested document mapping (since it contains arrays of objects)
// and index name and budget fields
departmentsMapping := bleve.NewNestedDocumentMapping()
departmentsMapping.AddFieldMappingsAt("name", bleve.NewTextFieldMapping())
departmentsMapping.AddFieldMappingsAt("budget", bleve.NewNumericFieldMapping())
// Define employees as a nested document mapping within departments (since it contains arrays of objects)
// and index name and role fields
employeesMapping := bleve.NewNestedDocumentMapping()
employeesMapping.AddFieldMappingsAt("name", bleve.NewTextFieldMapping())
employeesMapping.AddFieldMappingsAt("role", bleve.NewTextFieldMapping())
departmentsMapping.AddSubDocumentMapping("employees", employeesMapping)
// Define projects as a nested document mapping within departments (since it contains arrays of objects)
// and index title and status fields
projectsMapping := bleve.NewNestedDocumentMapping()
projectsMapping.AddFieldMappingsAt("title", bleve.NewTextFieldMapping())
projectsMapping.AddFieldMappingsAt("status", bleve.NewTextFieldMapping())
departmentsMapping.AddSubDocumentMapping("projects", projectsMapping)
// Define locations as a nested document mapping (since it contains arrays of objects)
// and index city and country fields
locationsMapping := bleve.NewNestedDocumentMapping()
locationsMapping.AddFieldMappingsAt("city", bleve.NewTextFieldMapping())
locationsMapping.AddFieldMappingsAt("country", bleve.NewTextFieldMapping())
// Define company as a document mapping and index its name field and
// add departments and locations as sub-document mappings
companyMapping := bleve.NewDocumentMapping()
companyMapping.AddFieldMappingsAt("name", bleve.NewTextFieldMapping())
companyMapping.AddSubDocumentMapping("departments", departmentsMapping)
companyMapping.AddSubDocumentMapping("locations", locationsMapping)
// Define the final index mapping and add company as a sub-document mapping in the default mapping
indexMapping := bleve.NewIndexMapping()
indexMapping.DefaultMapping.AddSubDocumentMapping("company", companyMapping)
// Create the index with the defined mapping
index, err := bleve.New("hierarchy_example.bleve", indexMapping)
if err != nil {
panic(err)
}
// Unmarshal the document JSON into a map, for indexing
var doc map[string]interface{}
err = json.Unmarshal([]byte(docJSON), &doc)
if err != nil {
panic(err)
}
// Index the document
err = index.Index("doc1", doc)
if err != nil {
panic(err)
}
```
## Querying
```go
// Open the index
index, err := bleve.Open("hierarchy_example.bleve")
if err != nil {
panic(err)
}
var (
req *bleve.SearchRequest
res *bleve.SearchResult
)
// Example 1: Simple Match Query on a field within a nested document, should work as if it were a flat field
q1 := bleve.NewMatchQuery("Engineer")
q1.SetField("company.departments.employees.role")
req = bleve.NewSearchRequest(q1)
res, err = index.Search(req)
if err != nil {
panic(err)
}
fmt.Println("Match Query Results:", res)
// Example 2: Conjunction Query (AND) on fields within the same nested document
// like finding employees with name "Eve" and role "Manager". This will only match
// if both terms are in the same employee object.
q1 = bleve.NewMatchQuery("Eve")
q1.SetField("company.departments.employees.name")
q2 := bleve.NewMatchQuery("Manager")
q2.SetField("company.departments.employees.role")
conjQuery := bleve.NewConjunctionQuery(
q1,
q2,
)
req = bleve.NewSearchRequest(conjQuery)
res, err = index.Search(req)
if err != nil {
panic(err)
}
fmt.Println("Conjunction Query Results:", res)
// Example 3: Multi-level Nested Query, finding projects with status "ongoing"
// within the "Engineering" department. This ensures both conditions are met
// within the correct hierarchy, i.e., the ongoing project must belong to the
// Engineering department.
q1 = bleve.NewMatchQuery("Engineering")
q1.SetField("company.departments.name")
q2 = bleve.NewMatchQuery("ongoing")
q2.SetField("company.departments.projects.status")
multiLevelQuery := bleve.NewConjunctionQuery(
q1,
q2,
)
req = bleve.NewSearchRequest(multiLevelQuery)
res, err = index.Search(req)
if err != nil {
panic(err)
}
fmt.Println("Multi-level Nested Query Results:", res)
// Example 4: Multiple Arrays Query, finding documents with a location in "London"
// and an employee with the role "Manager". This checks conditions across different arrays.
q1 = bleve.NewMatchQuery("London")
q1.SetField("company.locations.city")
q2 = bleve.NewMatchQuery("Manager")
q2.SetField("company.departments.employees.role")
multiArrayQuery := bleve.NewConjunctionQuery(
q1,
q2,
)
req = bleve.NewSearchRequest(multiArrayQuery)
res, err = index.Search(req)
if err != nil {
panic(err)
}
fmt.Println("Multiple Arrays Query Results:", res)
// Hybrid Arrays Query, combining multi-level and multiple arrays,
// finding documents with a Manager named Ivan working in Edinburgh, UK
q1 = bleve.NewMatchQuery("Ivan")
q1.SetField("company.departments.employees.name")
q2 = bleve.NewMatchQuery("Manager")
q2.SetField("company.departments.employees.role")
q3 := bleve.NewMatchQuery("Edinburgh")
q3.SetField("company.locations.city")
q4 := bleve.NewMatchQuery("UK")
q4.SetField("company.locations.country")
hybridArrayQuery := bleve.NewConjunctionQuery(
bleve.NewConjunctionQuery(
q1,
q2,
),
bleve.NewConjunctionQuery(
q3,
q4,
),
)
req = bleve.NewSearchRequest(hybridArrayQuery)
res, err = index.Search(req)
if err != nil {
panic(err)
}
fmt.Println("Hybrid Arrays Query Results:", res)
// Close the index when done
err = index.Close()
if err != nil {
panic(err)
}
```
================================================
FILE: docs/index_update.md
================================================
# Ability to reduce downtime during index mapping updates
* *v2.5.4* (and after) will come with support to delete or modify any field mapping in the index mapping without requiring a full rebuild of the index
* We do this by storing which portions of the field has to be deleted within zap and then lazily executing the deletion during subsequent merging of the segments
## Usage
While opening an index, if an updated mapping is provided as a string under the key `updated_mapping` within the `runtimeConfig` parameter of `OpenUsing`, then we open the index and try to update it to use the new mapping provided.
If the update fails, the index is unchanged and an error is returned explaining why the update was unsuccessful.
## What can be deleted and what can't be deleted?
Fields can be partially deleted by changing their Index, Store, and DocValues parameters from true to false, or completely removed by deleting the field itself.
Additionally, document mappings can be deleted either by fully removing them from the index mapping or by setting the Enabled value to false, which deletes all fields defined within that mapping.
However, if any of the following conditions are met, the index is considered non-updatable.
* Any additional fields or enabled document mappings in the new index mapping
* Any changes to IncludeInAll, type, IncludeTermVectors and SkipFreqNorm
* Any document mapping having its enabled value changing from false to true
* Text fields with a different analyser or date time fields with a different date time format
* Vector and VectorBase64 fields changing dims, similarity or vectorIndexOptimizedFor
* Any changes when field is part of `_all`
* Full field deletions when it is covered by any dynamic setting (Index, Store or DocValues Dynamic)
* Any changes to dynamic settings at the top level or any enabled document mapping
* If multiple fields sharing the same field name either from different type mappings or aliases are present, then any non compatible changes across all of these fields
## How to enforce immediate deletion?
Since the deletion is only done during merging, a [force merge](https://github.com/blevesearch/bleve/blob/b82baf10b205511cf12da5cb24330abd9f5b1b74/index/scorch/merge.go#L164) may be used to completely remove the stale data.
## Sample code to update an existing index
```go
newMapping := ``
config := map[string]interface{}{
"updated_mapping": newMapping,
}
index, err := bleve.OpenUsing("", config)
if err != nil {
return err
}
```
================================================
FILE: docs/pagination.md
================================================
# Pagination
## Why pagination matters
Search queries can match many documents. Pagination lets you fetch and display results in chunks, keeping responses small and fast.
By default, Bleve returns the first 10 hits sorted by relevance (score), highest first.
## Two pagination modes
- `From`/`Size`: simple and stateless; cost grows with page depth.
- `SearchAfter`/`SearchBefore`: efficient for deep paging; requires passing sort keys from the previous page.
Both modes can be used with any valid sort.
## `Size`/`From`
Offset-based pagination uses `Size` (page length) and `From` (number of hits to skip). Bleve collects at least `Size + From` ordered results, then returns the `Size` slice starting at `From`.
JSON example:
```json
{
"query": { "match": "California" },
"sort": ["-_score"],
"size": 5,
"from": 10
}
```
The result would be 5 hits starting from the 5th hit.
When to use:
- Simple, stateless pagination for shallow pages.
- Avoid for deep pages, as memory grows with `From` for deeper pages.
## `SearchAfter` and `SearchBefore`
This returns the next (or previous) page based on a boundary defined by the sort keys of a specific hit. This keeps resource usage proportional to the page size, even for deep pages.
Rules:
- Use either `SearchAfter` (forward) or `SearchBefore` (backward), not both at once.
- The length of `SearchAfter`/`SearchBefore` must match the length of `Sort`.
- Values are strings representing the sort keys, in the same order as `Sort`.
- Keep the same `query` and `sort` across pages for consistent navigation.
Where do sort keys come from?
- Each hit includes `Sort` (and `DecodedSort` from Bleve v2.5.2). Take the last hit's sort keys for `SearchAfter`, or the first hit's sort keys for `SearchBefore`.
- If the field/fields to be searched over is numeric, datetime or geo, the values in the `Sort` field may have garbled values; this is because of how Bleve represents such data types internally. To use such fields as sort keys, use the `DecodedSort` field, which decodes the internal representations. This feature is available from Bleve v2.5.4.
> When using `DecodedSort`, the `Sort` array in the search request needs to explicitly declare the type of the field for proper decoding. Hence, the `Sort` array must contain either `SortField` objects (for numeric and datetime) or `SortGeoDistance` objects (for geo) rather than just the field names. More info on `SortField` and `SortGeoDistance` can be found in [sort_facet.md](sort_facet.md).
Forward pagination over `_id` and `_score`:
```json
{
"query": { "match": "California" },
"sort": ["_id", "_score"],
"search_after": ["hotel_10180", "0.998"],
"size": 3
}
```
Backward pagination over `_id` and `_score`:
```json
{
"query": { "match": "California" },
"sort": ["_id", "_score"],
"search_before": ["hotel_17595", "0.623"],
"size": 4
}
```
Pagination using numeric, datetime and geo fields. Notice how we specify the sort objects, with the "type" field explicitly declared in case of numeric and datetime:
```json
{
"query": {
"match_all": {}
},
"size": 10,
"sort": [
{"by": "field", "field": "price", "type": "number"},
{"by": "field", "field": "created_at", "type": "date"},
{"by": "geo_distance", "field": "location", "location": {"lat": 40.7128,"lon": -74.0060}}
],
"search_after": ["99.99", "2023-10-15T10:30:00Z", "5.2"]
}
```
## Total Sort Order
Pagination is deterministic. Ensure your `Sort` defines a total order, so that documents with the same sort keys are not left out:
- Sort strings can be field names (prefix with `-` for descending), `"_score"`, or `"_id"`.
- Always include a stable tie-breaker as the last key, typically `"_id"`.
- Examples:
- `["country", "-age", "_id"]`
- `["-_score", "_id"]` (default score desc with a tie-breaker)
## Performance guidance
- Offset pagination cost grows with `From` (collects at least `Size + From` results before slicing).
- `SearchAfter`/`SearchBefore` keeps memory and network proportional to `Size`.
- For large datasets and deep navigation, prefer using `SearchAfter` and `SearchBefore`.
================================================
FILE: docs/persister.md
================================================
# Scorch Index Memory and File Management
## Memory Management
When data is indexed in Scorch — using either the `index.Index()` or `index.Batch()` API — it is added as part of an in-memory "segment". Memory management in Scorch indexing mainly relates to handling these in-memory segments during workloads that involve inserts or updates.
In scenarios with a continuous stream of incoming data, a large number of in-memory segments can accumulate over time. This is where the persister component comes into play—its job is to flush these in-memory segments to disk.
Starting with v2.5.0, Scorch supports parallel flushing of in-memory segments to disk, where the persister checks the total in-memory data and distributes the flush across multiple workers. This feature is disabled by default and can be enabled using two configuration options:
- `NumPersisterWorkers`: This factor decides how many maximum workers can be spawned to flush out the in-memory segments. Each worker will work on a disjoint subset of segments, merge them, and flush them out to the disk. By default the persister deploys only one worker.
- `MaxSizeInMemoryMergePerWorker`: This config decides what's the maximum amount of input data in bytes a single worker can work upon. By default this value is equal to 0 which means that this config is disabled and the worker tries to merge all the data in one shot. Also note that it's imperative that the user set this config if `NumPersisterWorkers > 1`.
If the index is tuned to have a higher `NumPersisterWorkers` value, the memory can potentially drain out faster and ensure stronger consistency behaviour — but there would be a lot of on-disk files, and the background merger would experience the pressure of managing this large number of files, which can be resource-intensive.
- Tuning this config is very dependent on the available CPU resources, and something to keep in mind here is that the process's RSS can increase if the number of workers — and each of them working upon a large amount of data — is high.
Increasing the `MaxSizeInMemoryMergePerWorker` value would mean that each worker acts upon a larger amount of data and spends more time merging and flushing it out to disk — which can be healthy behaviour in terms of I/O, although it comes at the cost of time.
- Changing this config is usecase dependent, for example in usecases where the payload or per doc size is generally large in size (for eg vector usecases), it would be beneficial to have a larger value for this.
So, having the ideal values for these two configs is definitely dependent on the use case and can involve a bunch of experiments, keeping the resource usage in mind.
## File Management
The persister introducing some number of file segments into the system would change the state of the system, and the merger would wake up and try to manage these on-disk files.
Management of these files is crucial when it comes to query latency because a higher number of files would dictate searching through a larger number of files and also higher read amplification to some extent, because the backing data structures can potentially be compacted in size across files.
The merger sees the files on disk and plans out which segments to merge so that the final layout of segment tiers (each tier having multiple files), which grow in a logarithmic way (the chances of larger tiers growing in number would decrease), is maintained. This also implies that deciding this first-tier size becomes important in deciding the number of segment files across all tiers.
Starting with v2.5.0, this first-tier size is dependent on the file size using the `FloorSegmentFileSize` config, because that's a better metric to consider (unlike the legacy live doc count metric) in order to ensure that the behaviour is in line with the use case and aware of the payload/doc size.
- This config can also be tuned to dictate how the I/O behaviour should be within an index. While tuning this config, it should be in proportion to the `MaxSizeInMemoryMergePerWorker` since that dictates the amount of data flushed out per flush.
- The observation here is that `FloorSegmentFileSize` is lesser than `MaxSizeInMemoryMergePerWorker` and for an optimal I/O during indexing, this value can be set close to `MaxSizeInMemoryMergePerWorker/6`.
## Setting a Persister/Merger Config in Index
The configs are set via the `kvConfig` parameter in the `NewUsing()` or `OpenUsing()` API:
```go
// setting the persister and merger configs
kvConfig := map[string]interface{}{
"scorchPersisterOptions": map[string]interface{}{
"NumPersisterWorkers": 4,
"MaxSizeInMemoryMergePerWorker": 20000000,
},
"scorchMergePlanOptions": map[string]interface{}{
"FloorSegmentFileSize": 10000000,
},
}
// passing the config to the index
index, err := bleve.NewUsing("example.bleve", bleve.NewIndexMapping(), bleve.Config.DefaultIndexType, bleve.Config.DefaultMemKVStore, kvConfig)
if err != nil {
panic(err)
}
```
================================================
FILE: docs/query-openapi-spec.yaml
================================================
openapi: 3.0.3
info:
title: Bleve JSON Query Language
description: |
A comprehensive specification for the Bleve Query JSON Language.
Bleve is a text indexing library for Go that provides full-text search capabilities.
This specification defines the JSON structure for all supported query types in Bleve.
## Query Types Supported:
### Basic Queries
- **Term Query**: Exact term matching
- **Match Query**: Full-text search with analysis
- **Match Phrase Query**: Phrase matching with proximity
- **Prefix Query**: Prefix matching
- **Wildcard Query**: Pattern matching with * and ?
- **Regexp Query**: Regular expression matching
- **Fuzzy Query**: Fuzzy matching with edit distance
- **Query String Query**: Human-readable query syntax
### Range Queries
- **Numeric Range Query**: Numeric value ranges
- **Date Range Query**: Date/time ranges
- **Term Range Query**: Lexicographic term ranges
### Boolean Queries
- **Boolean Query**: Must/should/must_not combinations
- **Conjunction Query**: All queries must match
- **Disjunction Query**: Any query can match
### Special Queries
- **Match All Query**: Matches all documents
- **Match None Query**: Matches no documents
- **Bool Field Query**: Boolean field matching
- **Doc ID Query**: Document ID matching
### Geographic Queries
- **Geo Distance Query**: Distance-based geographic search
- **Geo Bounding Box Query**: Bounding box geographic search
- **Geo Bounding Polygon Query**: Polygon geographic search
- **Geo Shape Query**: Complex geographic shape matching
### Vector Queries
- **KNN Query**: K-nearest neighbors vector search
### Network Queries
- **IP Range Query**: IP address range matching
## Common Properties
All queries support:
- **boost**: Query boost value (float, default 1.0)
- **field**: Field to search (string, optional)
## Query Parsing
Bleve automatically detects query types based on the presence of specific JSON fields.
The query parser uses a heuristic approach to determine the query type from the JSON structure.
version: 2.0.0
contact:
name: Bleve Search
url: https://github.com/blevesearch/bleve
license:
name: Apache 2.0
url: https://www.apache.org/licenses/LICENSE-2.0
tags:
- name: Basic Queries
description: Basic text search queries
- name: Range Queries
description: Range-based queries for numeric, date, and term values
- name: Boolean Queries
description: Compound queries combining multiple sub-queries
- name: Special Queries
description: Special purpose queries like match all/none
- name: Geographic Queries
description: Geographic and spatial search queries
- name: Vector Queries
description: Vector similarity and KNN queries
- name: Network Queries
description: Network and IP address queries
components:
schemas:
# Base Query Schema
Query:
type: object
description: Base query object - all queries extend this
properties:
boost:
type: number
format: float
description: Query boost value
default: 1.0
minimum: 0
field:
type: string
description: Field to search (optional, uses default field if not specified)
additionalProperties: true
# Basic Queries
TermQuery:
allOf:
- type: object
properties:
term:
type: string
description: Exact term to search for
example: "search"
required:
- term
example:
term: "search"
field: "title"
boost: 2.0
MatchQuery:
allOf:
- type: object
properties:
match:
type: string
description: Text to match (will be analyzed)
example: "full text search"
operator:
type: string
enum: [and, or]
description: Boolean operator for multiple terms
default: "or"
fuzziness:
oneOf:
- type: integer
minimum: 0
maximum: 2
description: Edit distance for fuzzy matching
- type: string
enum: ["auto"]
description: Automatic fuzziness
default: 0
prefix_length:
type: integer
description: Prefix length for fuzzy matching
minimum: 0
default: 0
analyzer:
type: string
description: Analyzer to use (optional)
required:
- match
example:
match: "full text search"
field: "content"
operator: "and"
fuzziness: 1
MatchPhraseQuery:
allOf:
- type: object
properties:
match_phrase:
type: string
description: Phrase to match exactly
example: "exact phrase match"
fuzziness:
oneOf:
- type: integer
minimum: 0
maximum: 2
- type: string
enum: ["auto"]
default: 0
analyzer:
type: string
description: Analyzer to use (optional)
required:
- match_phrase
example:
match_phrase: "exact phrase match"
field: "content"
fuzziness: "auto"
PrefixQuery:
allOf:
- type: object
properties:
prefix:
type: string
description: Prefix to match
example: "pref"
required:
- prefix
example:
prefix: "pref"
field: "title"
WildcardQuery:
allOf:
- type: object
properties:
wildcard:
type: string
description: Wildcard pattern (* matches any sequence, ? matches single character)
example: "te*t"
required:
- wildcard
example:
wildcard: "te*t"
field: "title"
RegexpQuery:
allOf:
- type: object
properties:
regexp:
type: string
description: Regular expression pattern (should not include ^ or $)
example: "te[st]"
required:
- regexp
example:
regexp: "te[st]"
field: "title"
FuzzyQuery:
allOf:
- type: object
properties:
term:
type: string
description: Term for fuzzy matching
example: "search"
fuzziness:
oneOf:
- type: integer
minimum: 0
maximum: 2
- type: string
enum: ["auto"]
default: 1
prefix_length:
type: integer
description: Prefix length for fuzzy matching
minimum: 0
default: 0
required:
- term
example:
term: "search"
field: "title"
fuzziness: "auto"
QueryStringQuery:
allOf:
- type: object
properties:
query:
type: string
description: Query string in human-readable syntax
example: "title:search AND content:full text"
required:
- query
example:
query: "title:search AND content:full text"
# Range Queries
NumericRangeQuery:
allOf:
- type: object
properties:
min:
type: number
format: float
description: Minimum value (inclusive by default)
max:
type: number
format: float
description: Maximum value (exclusive by default)
inclusive_min:
type: boolean
description: Whether minimum is inclusive
default: true
inclusive_max:
type: boolean
description: Whether maximum is inclusive
default: false
example:
min: 10.5
max: 100.0
inclusive_min: true
inclusive_max: false
field: "price"
DateRangeQuery:
allOf:
- type: object
properties:
start:
type: string
format: date-time
description: Start date/time
end:
type: string
format: date-time
description: End date/time
inclusive_start:
type: boolean
description: Whether start is inclusive
default: true
inclusive_end:
type: boolean
description: Whether end is inclusive
default: false
example:
start: "2023-01-01T00:00:00Z"
end: "2023-12-31T23:59:59Z"
inclusive_start: true
inclusive_end: true
field: "created_at"
TermRangeQuery:
allOf:
- type: object
properties:
min:
type: string
description: Minimum term (lexicographic)
max:
type: string
description: Maximum term (lexicographic)
inclusive_min:
type: boolean
description: Whether minimum is inclusive
default: true
inclusive_max:
type: boolean
description: Whether maximum is inclusive
default: false
example:
min: "a"
max: "m"
inclusive_min: true
inclusive_max: false
field: "name"
# Boolean Queries
BooleanQuery:
allOf:
- type: object
properties:
must:
oneOf:
- type: array
items:
$ref: '#/components/schemas/Query'
- $ref: '#/components/schemas/Query'
description: Queries that must match (can be single query or array)
should:
oneOf:
- type: array
items:
$ref: '#/components/schemas/Query'
- $ref: '#/components/schemas/Query'
description: Queries that should match (boost score, can be single query or array)
must_not:
oneOf:
- type: array
items:
$ref: '#/components/schemas/Query'
- $ref: '#/components/schemas/Query'
description: Queries that must not match (can be single query or array)
filter:
oneOf:
- type: array
items:
$ref: '#/components/schemas/Query'
- $ref: '#/components/schemas/Query'
description: Queries that filter results (no score impact, can be single query or array)
min_should:
type: integer
description: Minimum number of should queries that must match
minimum: 0
example:
must:
- term: "important"
field: "status"
- match: "urgent"
field: "title"
should:
- match: "critical"
field: "content"
- match: "priority"
field: "tags"
must_not:
- term: "archived"
field: "status"
filter:
- term: "published"
field: "status"
boost: 1.5
ConjunctionQuery:
allOf:
- type: object
properties:
conjuncts:
type: array
items:
$ref: '#/components/schemas/Query'
description: Array of queries that must all match
minItems: 1
required:
- conjuncts
example:
conjuncts:
- term: "search"
field: "title"
- match: "full text"
field: "content"
DisjunctionQuery:
allOf:
- type: object
properties:
disjuncts:
type: array
items:
$ref: '#/components/schemas/Query'
description: Array of queries where at least one must match
minItems: 1
min:
type: number
format: float
description: Minimum number of disjuncts that must match
default: 1
minimum: 0
required:
- disjuncts
example:
disjuncts:
- term: "urgent"
field: "title"
- term: "critical"
field: "content"
min: 1
# Special Queries
MatchAllQuery:
allOf:
- type: object
properties:
match_all:
type: object
description: Empty object indicating match all
example:
match_all: {}
boost: 1.0
MatchNoneQuery:
allOf:
- type: object
properties:
match_none:
type: object
description: Empty object indicating match none
example:
match_none: {}
BoolFieldQuery:
allOf:
- type: object
properties:
bool:
type: boolean
description: Boolean value to match
required:
- bool
example:
bool: true
field: "published"
DocIDQuery:
allOf:
- type: object
properties:
ids:
type: array
items:
type: string
description: Array of document IDs to match
minItems: 1
required:
- ids
example:
ids: ["doc1", "doc2", "doc3"]
# Geographic Queries
GeoDistanceQuery:
allOf:
- type: object
properties:
location:
oneOf:
- type: array
items:
type: number
format: float
minItems: 2
maxItems: 2
description: [longitude, latitude]
- type: object
properties:
lon:
type: number
format: float
lat:
type: number
format: float
distance:
type: string
description: Distance string (e.g., "10km", "5mi")
required:
- location
- distance
example:
location: [-122.4194, 37.7749]
distance: "10km"
field: "location"
GeoBoundingBoxQuery:
allOf:
- type: object
properties:
top_left:
oneOf:
- type: array
items:
type: number
format: float
minItems: 2
maxItems: 2
- type: object
properties:
lon:
type: number
format: float
lat:
type: number
format: float
bottom_right:
oneOf:
- type: array
items:
type: number
format: float
minItems: 2
maxItems: 2
- type: object
properties:
lon:
type: number
format: float
lat:
type: number
format: float
required:
- top_left
- bottom_right
example:
top_left: [-122.5, 37.8]
bottom_right: [-122.3, 37.7]
field: "location"
GeoBoundingPolygonQuery:
allOf:
- type: object
properties:
polygon_points:
type: array
items:
oneOf:
- type: array
items:
type: number
format: float
minItems: 2
maxItems: 2
- type: object
properties:
lon:
type: number
format: float
lat:
type: number
format: float
description: Array of polygon points [lon, lat]
minItems: 3
required:
- polygon_points
example:
polygon_points:
- [-122.5, 37.8]
- [-122.3, 37.8]
- [-122.3, 37.7]
- [-122.5, 37.7]
field: "location"
GeoShapeQuery:
allOf:
- type: object
properties:
geometry:
type: object
description: GeoJSON geometry object
properties:
type:
type: string
enum: [Point, LineString, Polygon, MultiPoint, MultiLineString, MultiPolygon, GeometryCollection]
coordinates:
description: Geometry coordinates (format depends on geometry type)
oneOf:
- type: array
description: Coordinates for Point, LineString, MultiPoint, etc.
- type: array
items:
type: array
description: Nested coordinates for Polygon, MultiLineString, etc.
- type: array
items:
type: array
items:
type: array
description: Deeply nested coordinates for MultiPolygon, etc.
required:
- type
- coordinates
required:
- geometry
example:
geometry:
type: "Polygon"
coordinates:
- [[-122.5, 37.8], [-122.3, 37.8], [-122.3, 37.7], [-122.5, 37.7], [-122.5, 37.8]]
field: "location"
# Vector Queries
KNNQuery:
allOf:
- type: object
properties:
field:
type: string
description: Vector field name
vector:
type: array
items:
type: number
format: float
description: Query vector
minItems: 1
k:
type: integer
format: int64
description: Number of nearest neighbors to return
minimum: 1
maximum: 10000
params:
type: object
description: Additional parameters for vector search
required:
- field
- vector
- k
example:
field: "embedding"
vector: [0.1, 0.2, 0.3, 0.4, 0.5]
k: 10
boost: 1.0
# Network Queries
IPRangeQuery:
allOf:
- type: object
properties:
cidr:
type: string
description: CIDR notation for IP range (e.g., "192.168.1.0/24")
required:
- cidr
example:
cidr: "192.168.1.0/24"
field: "ip_address"
# Phrase Query (internal use)
PhraseQuery:
allOf:
- type: object
properties:
terms:
type: array
items:
type: string
description: Array of terms in phrase order
required:
- terms
example:
terms: ["exact", "phrase", "match"]
field: "content"
MultiPhraseQuery:
allOf:
- type: object
properties:
terms:
type: array
items:
type: array
items:
type: string
description: Array of term arrays for multi-phrase matching
required:
- terms
example:
terms:
- ["exact", "precise"]
- ["phrase"]
- ["match", "search"]
examples:
# Basic Query Examples
simple_term:
summary: Simple Term Query
value:
term: "search"
field: "title"
boost: 2.0
simple_match:
summary: Simple Match Query
value:
match: "full text search"
field: "content"
operator: "and"
fuzzy_search:
summary: Fuzzy Search
value:
term: "search"
field: "title"
fuzziness: "auto"
prefix_length: 2
# Boolean Query Examples
complex_boolean:
summary: Complex Boolean Query
value:
must:
- term: "important"
field: "status"
- match: "urgent"
field: "title"
should:
- match: "critical"
field: "content"
- match: "priority"
field: "tags"
must_not:
- term: "archived"
field: "status"
filter:
- term: "published"
field: "status"
boost: 1.5
# Range Query Examples
price_range:
summary: Price Range Query
value:
min: 10.0
max: 100.0
inclusive_min: true
inclusive_max: false
field: "price"
date_range:
summary: Date Range Query
value:
start: "2023-01-01T00:00:00Z"
end: "2023-12-31T23:59:59Z"
inclusive_start: true
inclusive_end: true
field: "created_at"
# Geographic Query Examples
geo_distance:
summary: Geographic Distance Query
value:
location: [-122.4194, 37.7749]
distance: "10km"
field: "location"
geo_bounding_box:
summary: Geographic Bounding Box Query
value:
top_left: [-122.5, 37.8]
bottom_right: [-122.3, 37.7]
field: "location"
# Vector Query Examples
knn_search:
summary: K-Nearest Neighbors Query
value:
field: "embedding"
vector: [0.1, 0.2, 0.3, 0.4, 0.5]
k: 10
boost: 1.0
# Query String Examples
query_string:
summary: Query String Query
value:
query: "title:search AND (content:full text OR content:search) AND -status:archived"
responses:
ValidationSuccess:
description: Query validation successful
content:
application/json:
schema:
type: object
properties:
valid:
type: boolean
example: true
query_type:
type: string
example: "term"
message:
type: string
example: "Query is valid"
ValidationError:
description: Query validation failed
content:
application/json:
schema:
type: object
properties:
valid:
type: boolean
example: false
error:
type: string
example: "unknown query type"
details:
type: string
example: "No recognized query fields found"
externalDocs:
description: Bleve Documentation
url: https://github.com/blevesearch/bleve
================================================
FILE: docs/score_fusion.md
================================================
# Score Fusion for Hybrid Search
Bleve supports **hybrid search** that combines full-text search (FTS) with vector (kNN) search to leverage the strengths of both approaches:
* **Full-text search** excels at exact keyword matching, filtering, and structured queries
* **Vector search** captures semantic similarity and handles synonyms and paraphrasing naturally
With *v2.5.4* onwards - when using hybrid search, you can choose different **score fusion strategies** to combine results from both search methods. This document describes the available fusion strategies and how to use them.
## Fusion Strategies
### Additive Score Fusion (Default)
By default, Bleve combines FTS and kNN scores using a simple weighted addition. See the [Vector Search documentation](vectors.md#querying) for details on the default hybrid search behavior and examples.
While this approach works well with proper boost tuning, it can be sensitive to different score scales and distributions. The fusion strategies below (RRF and RSF) provide more robust alternatives that handle score normalization automatically.
### Reciprocal Rank Fusion (RRF)
Reciprocal Rank Fusion is a **rank-based** algorithm that combines results based on their position in each result list, rather than their raw scores. This makes it robust to different score scales and distributions.
**Algorithm:**
For each document appearing in FTS or kNN results, the RRF score is calculated as:
```math
RRF\_score = w_{\text{fts}} \cdot \frac{1}{k + \text{rank}_{\text{fts}}} + \sum_{i=1}^{n} w_{\text{knn}_i} \cdot \frac{1}{k + \text{rank}_{\text{knn}_i}}
```
Where:
* $\text{rank}_{\text{fts}}$: 1-indexed rank of the document in the FTS result list (or 0 if not present)
* $\text{rank}_{\text{knn}_i}$: 1-indexed rank of the document in the i-th kNN result list (or 0 if not present)
* $k$: rank constant (default: 60) that dampens the impact of rank differences
* $w_{\text{fts}}$: weight from the FTS query boost value
* $w_{\text{knn}_i}$: weight from the i-th kNN query boost value
* $\sum_{i=1}^{n}$: summation over all kNN queries (you can add multiple kNN queries)
**Advantages:**
* Distribution-agnostic - no need for score normalization
* Works out of the box with minimal tuning
* Prioritizes documents appearing in both result lists
* Robust to outliers since only ranks matter
**Disadvantages:**
* Ignores score magnitude (loses some information)
* May be sensitive to imbalanced result list sizes
**Usage:**
```go
// Create a hybrid search with RRF fusion
searchRequest := bleve.NewSearchRequest(bleve.NewMatchQuery("dark chocolate"))
searchRequest.Score = bleve.ScoreRRF // Alternatively, set to "rrf"
// Add first kNN component
searchRequest.AddKNN(
"embedding", // Vector field
[]float32{0.1, 0.2, 0.3, 0.4}, // Query vector
30, // k neighbors
1.0, // kNN weight (boost)
)
// Add second kNN component (optional - you can add multiple)
searchRequest.AddKNN(
"image_embedding", // Different vector field
[]float32{0.5, 0.3, 0.1, 0.8}, // Query vector
20, // k neighbors
0.5, // kNN weight (boost)
)
// Optional: Configure RRF parameters
params := bleve.RequestParams{
ScoreRankConstant: 60, // Rank constant (default: 60)
ScoreWindowSize: 150 // Window size (default: size)
}
searchRequest.AddParams(params)
searchResult, err := index.Search(searchRequest)
```
### Relative Score Fusion (RSF)
Relative Score Fusion is a **score-based** strategy that normalizes scores from both modalities into a common [0, 1] range using min-max normalization before combining them.
**Algorithm:**
1. **Min-max normalize** each result set independently:
```math
\text{normalized\_score} = \frac{\text{score} - \text{min\_score}}{\text{max\_score} - \text{min\_score}}
```
2. **Combine** normalized scores using weighted addition:
```math
RSF\_score = w_{\text{fts}} \cdot \text{normalized\_score\_fts} + \sum_{i=1}^{n} w_{\text{knn}_i} \cdot \text{normalized\_score\_knn}_i
```
Where:
* $w_{\text{fts}}$: weight from the FTS query boost value
* $w_{\text{knn}_i}$: weight from the i-th kNN query boost value
* $\sum_{i=1}^{n}$: summation over all kNN queries (you can add multiple kNN queries)
**Advantages:**
* Score-aware - retains relevance magnitude information
* Resolves incompatible score ranges
* Easy to understand
**Disadvantages:**
* Sensitive to outliers - a single extreme score can skew normalization
* Doesn't account for the shape or distribution of scores
**Usage:**
```go
// Create a hybrid search with RSF fusion
searchRequest := bleve.NewSearchRequest(bleve.NewMatchQuery("machine learning"))
searchRequest.Score = bleve.ScoreRSF // Or set to "rsf"
// Add first kNN component
searchRequest.AddKNN(
"content_vector", // Vector field
[]float32{0.5, 0.3, 0.1, 0.8}, // Query vector
20, // k neighbors
1.0, // kNN weight (boost)
)
// Add second kNN component (optional - you can add multiple)
searchRequest.AddKNN(
"title_vector", // Different vector field
[]float32{0.2, 0.7, 0.4, 0.1}, // Query vector
15, // k neighbors
0.8, // kNN weight (boost)
)
// Optional: Configure RRF parameters
params := bleve.RequestParams{
ScoreWindowSize: 150 // Window size (default: size)
}
searchRequest.AddParams(params)
searchResult, err := index.Search(searchRequest)
```
## Parameters
### Score
The `Score` field in your search request specifies which fusion strategy to use:
* **`ScoreRRF ("rrf")`**: Reciprocal Rank Fusion
* **`ScoreRSF ("rsf")`**: Relative Score Fusion
* **Omitted or empty**: Default additive fusion with scores returned
### Params
The `Params` object contains additional parameters for score fusion:
#### Score Window Size
`ScoreWindowSize` is the maximum number of results to consider from each result list for fusion.
* **Default**: Same as `Size` parameter
* **Minimum**: Must be ≥ `Size` and ≥ 1
* **Purpose**: Controls the tradeoff between relevance and performance
A larger window size increases the chance of finding relevant results but requires more computation. For pagination to work consistently, ensure:
```text
From + Size <= ScoreWindowSize
```
**Example:**
```json
{
"score": "rrf",
"params": {
"score_window_size": 150
},
"size": 10,
"from": 0
}
```
With window size set to 150, you can paginate through up to 150 results. If you try to access results beyond this (e.g., `from=160`), you'll get an empty result set.
#### Score Rank Constant
> *Only applicable for RRF*
`ScoreRankConstant` controls how much the rank position affects the reciprocal rank score.
* **Default**: 60
* **Range**: Any positive integer
* **Effect**: Higher values dampen the impact of rank differences
**Example:**
```json
{
"score": "rrf",
"params": {
"score_rank_constant": 60
}
}
```
## Weighting Queries
The boost value in your query components controls their relative importance in hybrid search:
```go
// FTS query with boost 2.0
query := bleve.NewMatchQuery("search term")
query.SetBoost(2.0)
searchRequest := bleve.NewSearchRequest(query)
// kNN query with boost 1.0
searchRequest.AddKNN("vec", queryVector, 10, 1.0)
```
For RRF and RSF, weights determine the **relative importance** of each component's contribution, rather than scaling raw scores.
**Example:** If `fts_boost = 2.0` and `knn_boost = 1.0`, the FTS contribution is twice as important as the kNN contribution in the final ranking in RRF or RSF.
## Restrictions
When using score fusion (`Score` set to `"rrf"` or `"rsf"`), certain features are not supported:
* **SearchAfter/SearchBefore**: Not compatible with score fusion. For pagination, use `From` and `Size` only.
* **Sort**: Only descending score sort (`-_score`) or default sorting is allowed
* **Faceting**: Only documents included in the FTS result list are considered. Documents that appear exclusively in the KNN result list are ignored during faceting.
## Choosing a Fusion Strategy
| Use Case | Recommended Strategy |
|----------|---------------------|
| Different score scales (e.g., TF-IDF + L2 distance) | **RRF/RSF** |
| Minimal tuning, out-of-the-box performance | **RRF** |
| Want to preserve score magnitude importance | **RSF** |
| Have well-tuned boost values already | **Additive (default)** |
| Score distributions have extreme outliers | **RRF** |
================================================
FILE: docs/scoring.md
================================================
# Scoring models for document hits
* Search is performed on a collection fields using compound queries such as conjunction/disjunction/boolean etc. However, the scoring itself is done independently for each field and then aggregated to get the final score for a document hit.
* Default scoring scheme for document hits involving text hits: `tf-idf`.
* Nearest-neighbor/vector hits scoring depends on chosen `knn distance` metric, highlighted [here](https://github.com/blevesearch/bleve/blob/master/docs/vectors.md#supported).
* Hybrid search scoring will combine `tf-idf` scores with `knn distance` numbers.
* *v2.5.0* (and after) will come with support for `bm25` scoring for exact searches.
## BM25
When it comes to scoring a document hit for a specific field, BM25 scoring mechanism requires the following stats:
* fieldLen - The number of analyzed terms in the current document's field.
* avgFieldLen - The average number of analyzed terms in the field across all the documents.
* docTotal - The total number of documents in the index.
* docTerm - The total number of documents containing the query term within the index.
The scoring formula followed in BM25 is
```math
\sum_{i}^n IDF(q_i) {{f(q_i,D) * (k1 + 1)}\over{f(q_i,D) + k1 * (1-b+b*{{fieldLen}\over{avgFieldLen}})}}
```
$IDF(q_i)$ here refers to Inverse Document Frequency talks about how rare (and hence rich in information) is a particular query term $`q_i`$ across all the documents in the index, which is calculated as
```math
\ln(1 + {{docTotal - docTerm + 0.5}\over{docTerm + 0.5}})
```
Coming back to the BM25 scoring, $f(q_i,D)$ refers to the frequency of the query term in document $D$. The entire equation has certain multipliers
* $k1$ - helps in controlling the saturation of the score with respect to query term in a document. Basically if the query term's frequency is too high, the score value gets saturated and doesn't increase beyond a certain point.
* $b$ - controls the extent to which the $fieldLen$ normalizes the term's frequency.
### How to enable and use BM25
Bleve v2.5.0 updated the `indexMapping` construct with the concept of `scoringModel`. This is a global (meaning applicable to all the fields) setting which drives which scoring algorithm to apply while scoring the document hits. Supported scoring models are defined [here](https://github.com/blevesearch/bleve_index_api/blob/f54d76f0a71a838837159aa44ced0404bb6ec25f/indexing_options.go#L27)
For instance, while defining the index mapping for the data model that's been decided by the user, following snippet can be referred to enable BM25
```go
indexMapping := bleve.NewIndexMapping()
indexMapping.TypeField = "type"
indexMapping.DefaultAnalyzer = "en"
indexMapping.ScoringModel = "bm25"
```
During search time there's explicit change involved, unless the user wants to perform a global scoring.
### Global Scoring
Let's say that the user has a dataset which is quite large (let's say 3 million) and to have good throughput, they create 3 shards (with the same index mapping) for the "index". Each of these shards can be `bleve.Index` type and while performing a search over the entire dataset, a `bleve.IndexAlias` can be created over which a search can be performed. This parallelizes things pretty good, both on the indexing path and the search path.
The concept of global scoring is applicable when the index is "sharded" (similar to above situation). This is because each index has data which is disjoint, and thereby while performing the scoring on document hits on each of them, the value of stats is not complete at a global level, since we're doing a search over the entire dataset using the `bleve.IndexAlias`. For eg: `docTotal` value while scoring the document hits would be 1 million which is incorrect at a global level.
So in order to keep the scoring roughly same across varying count of the number of shards involved, we provide a mechanism to enable "global scoring". In this type of search, an initial roundtrip is performed to gather and aggregate the stats necessary for the scoring mechanism and in the second phase, the actual search is performed. So naturally this comes at a cost of latency. As a reference here's how the user can go about with it
```go
multiPartIndex := bleve.NewIndexAlias(shard1, shard2)
// set the alias with the same index mapping which both the shards use.
err = multiPartIndex.SetIndexMapping(indexMapping)
if err != nil {
return err
}
ctx := context.Background()
ctx = context.WithValue(ctx, search.SearchTypeKey, search.GlobalScoring)
res, err := multiPartIndex.SearchInContext(ctx, searchRequest)
```
A note here is that, this would only matter if the relative order of the document hits vary quite a bit (vs single shard case). This would be possible when the shard count increases quite a bit, in low doc count situations or if there is a heavy skew in the data distribution amongst the shards for some reason. Ideally the shards are created when the data is quite large and each of them index same amount of data - in which case the scores won't fluctuate much to affect the relative hit order and the user can choose to avoid the global scoring mechanism altogether.
## TF-IDF
TF-IDF is the default scoring mechanism involved (for backward compatibility reasons) and requires no change from the user at index or search time to avail it.
The scoring formula involved is
```math
\sum_{i}^n f(q_i, D) * {{1}\over{\sqrt{fieldLen}}} * IDF(q_i)
```
where $IDF(q_i)$ is
```math
1 + {{docTotal}\over{1 + docTerm}}
```
Note: TF-IDF formula doesn't accommodate logic for score saturation due to term frequency or fieldLen. So, it's recommended to use BM25 scoring by explicitly setting it in the index mapping.
================================================
FILE: docs/search_autocomplete.md
================================================
# Edge N-gram Autocomplete in Bleve
Search autocomplete is a feature which we see in search boxes when suggestions appear while we type.
So when we type `jav`, we see suggestions like: `java` `javascript` `javascript programming` etc.
This is helpful because it saves users time in finding what they are looking for.

## 2. How Does It Work?
Autocomplete generally works in three steps:
1. **Index Time**: Breaking text into searchable pieces (tokens)
2. **Query Time**: Matching user input against indexed tokens
3. **Results**: Returning relevant suggestions quickly
But before we jump into the flow, let's understand different methods to achieve this and why edge n-grams are the most efficient approach.
## 3. Different Tokenization Methods
There are several tokenization approaches, each with its own strengths and weaknesses:
### 3.1 Single Token Tokenizer
```go
// analysis/tokenizer/single/single.go
func (t *SingleTokenTokenizer) Tokenize(input []byte) analysis.TokenStream {
return analysis.TokenStream{
&analysis.Token{
Term: input, // Here entire input as one token
Position: 1,
Start: 0,
End: len(input),
Type: analysis.AlphaNumeric,
},
}
}
```
**How it works**: Treats the entire input as a single token.
**Example**: "JavaScript Programming" → [`"JavaScript Programming"`]
**Pros**:
- Simple and fast
- Perfect for exact phrase matching
- Minimal index size
**Cons**:
- No autocomplete support (can't match partial text)
- Not flexible for search
**Use case**: Keyword fields, IDs, exact phrase matching
### 3.2 Whitespace Token Tokenizer
```go
// analysis/tokenizer/whitespace/whitespace.go
func TokenizerConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.Tokenizer, error) {
return character.NewCharacterTokenizer(notSpace), nil
}
func notSpace(r rune) bool {
return !unicode.IsSpace(r) // Split on whitespace
}
```
**How it works**: Splits text on whitespace characters.
**Example**: "JavaScript Programming" → [`"JavaScript"`, `"Programming"`]
**Pros**:
- Simple word-based tokenization
- Works well for basic prefix search
**Cons**:
- Only matches from word beginnings
- No support for partial word matching
- Limited autocomplete capabilities
**Use case**: Basic search, word-level indexing
### 3.3 N-gram Method
```go
// analysis/token/ngram/ngram.go
func (s *NgramFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, len(input))
for _, token := range input {
runeCount := utf8.RuneCount(token.Term)
runes := bytes.Runes(token.Term)
// ..generate all possible n-grams
}
return rv
}
```
**How it works**: Creates ALL possible substrings of specified lengths.
**Example**: "java" with 2-3 grams → [`"ja"`, `"av"`, `"va"`, `"jav"`, `"ava"`]
**Pros**:
- Supports partial matching anywhere in the text
- Coverage of all possible substrings
**Cons**:
- **MASSIVE index size** (exponential growth)
- Brings more noise as irrelevant matches (e.g., "av" matching "java")
- Poor performance for autocomplete
- High memory usage
**Use case**: Full-text substring search (not ideal for autocomplete)
### 3.4 Edge N-gram Method (preferred for autocomplete)
```go
// analysis/token/edgengram/edgengram.go
func (s *EdgeNgramFilter) Filter(input analysis.TokenStream) analysis.TokenStream {
rv := make(analysis.TokenStream, 0, len(input))
for _, token := range input {
runeCount := utf8.RuneCount(token.Term)
runes := bytes.Runes(token.Term)
// ..builds tokens based from either end, specified in the input
}
return rv
}
```
**How it works**: Creates substrings only from the beginning (or end) of each word.
**Example**: "javascript" with front edge n-grams (1-5) → [`"j"`, `"ja"`, `"jav"`, `"java"`, `"javas"`]
**Pros**:
- Perfect for autocomplete (matches prefixes naturally)
- Efficient index size (linear growth vs exponential)
- Fast queries (direct term matching, no complex processing)
- Intuitive results (matches what users expect)
- Highly scalable for large datasets
**Cons**:
- Only supports prefix matching (but that's preferred for autocomplete!)
- Slightly larger index than basic tokenization
**Use case**: Search autocomplete, prefix-based search
## 4. Why Edge N-grams Are Most Efficient for Autocomplete
Let's see what happens when a user types "java" with edge_ngram tokenizer:
```text
Index contains: ["j", "ja", "jav", "java", "javas", "javasc", "javascr", ...]
User types: "java"
Query: ExactTermQuery("java")
Process: Direct hash table lookup for term "java"
Result: Instant match, then retrieve documents containing this term
```
**Key advantages of Edge N-gram approach:**
1. **O(1) lookup**: Direct term matching
2. **No query-time processing**: Terms are pre-computed at index time
3. **Better caching**: Exact term queries cache better than prefix queries
4. **Consistent performance**: Query time doesn't increase with index size
## 5. Step-by-Step Implementation: Building Search Autocomplete
Now let's see how to implement this in practice using our exact configuration. We'll build a complete autocomplete system step by step.
### Step 1: Create Custom Edge N-gram Token Filter
First, we need to create a custom token filter for edge n-grams. Here's how it looks in our configuration:
```go
// Create a new index mapping
indexMapping := mapping.NewIndexMapping()
// 1. Define the edgeGram token filter
edgeGramFilter := map[string]interface{}{
"type": edgengram.Name,
"min": 2.0,
"max": 4.0,
"back": false,
}
// Register the token filter
if err := indexMapping.AddCustomTokenFilter("Engram", edgeGramFilter); err != nil {
log.Fatal(err)
}
```
**What each setting does:**
- `"type": "edge_ngram"` - Tells Bleve to use the edge n-gram filter
- `"min": 2` - Start creating tokens from 2 characters ("ja", "sc", etc.)
- `"max": 4` - Stop at 4 characters ("java", "scri", etc.)
- `"back": "false"` - Create tokens from the front (beginning) of words
### Step 2: Create Custom Analyzer
Next, we create an analyzer that uses our custom token filter along with other helpful filters:
```go
// 2. Define a custom analyzer that uses it
customAnalyzer := map[string]interface{}{
"type": custom.Name,
"tokenizer": "unicode",
"char_filters": []string{
zerowidthnonjoiner.Name,
},
"token_filters": []string{
"Engram", // our custom edge_ngram filter
"to_lower",
"stop_en",
},
}
if err := indexMapping.AddCustomAnalyzer("edgeGramAnalyzer", customAnalyzer); err != nil {
log.Fatal(err)
}
```
**The pipeline works like this:**
1. **Input Text**: "Schaumbergfest Event"
2. **Tokenizer** (`unicode`): Splits into words
```text
["Schaumbergfest", "Event"]
```
3. **Character Filter** (`zero_width_spaces`): Removes invisible characters
```text
["Schaumbergfest", "Event"] (cleaned)
```
4. **Token Filter 1** (`Engram`): Creates edge n-grams (2-4 chars)
```text
["Sc", "Sch", "Scha", "Ev", "Eve", "Even"]
```
5. **Token Filter 2** (`to_lower`): Makes everything lowercase
```text
["sc", "sch", "scha", "ev", "eve", "even"]
```
6. **Token Filter 3** (`stop_en`): Removes common words (none in this case)
```text
["sc", "sch", "scha", "ev", "eve", "even"] (final tokens)
```
### Step 3: Configure Field Mapping
Now we tell Bleve which fields to apply our autocomplete analyzer to:
```go
// 3. Assign analyzer to a field mapping
fieldMapping := mapping.NewTextFieldMapping()
fieldMapping.Analyzer = "edgeGramAnalyzer"
indexMapping.DefaultMapping.AddFieldMappingsAt("title", fieldMapping)
indexPath := "example.bleve"
index, err := bleve.New(indexPath, indexMapping)
if err != nil {
log.Fatal(err)
}
```
### Step 4: How It Works in Real Search
When someone searches for "sc", here's what happens:
**Index contains these tokens:**
```text
"sc" → [document1: "Schaumbergfest", document2: "Script", ...]
"sch" → [document1: "Schaumbergfest", ...]
"scha" → [document1: "Schaumbergfest", ...]
```
**User types "sc":**
1. Query: `name:sc`
2. Bleve looks up exact term "sc" in the index
3. Finds document with "Schaumbergfest" and "Script"
4. Returns suggestion instantly
```go
type Document struct {
ID string `json:"id"`
Title string `json:"title"`
}
// 4. Index Documents
documents := []Document{
{
ID: "doc1",
Title: "Schaumbergfest",
},
{
ID: "doc2",
Title: "Script",
},
}
batch := index.NewBatch()
for _, doc := range documents {
batch.Index(doc.ID, doc)
}
if err := index.Batch(batch); err != nil {
log.Fatal(err)
}
// 5. Search the created index
query := bleve.NewMatchQuery("sc")
query.SetField("title")
searchRequest := bleve.NewSearchRequest(query)
searchRequest.Explain = true
searchRequest.Fields = []string{"title"}
searchResult, err := index.Search(searchRequest)
if err != nil {
log.Fatal(err)
}
fmt.Println(searchResult)
```
Output:
```bash
$ go run main.go
2 matches, showing 1 through 2, took 189.041µs
1. doc2 (0.343255)
title
Script
2. doc1 (0.343255)
title
Schaumbergfest
```
Note: To run code, enclose code starting from Step 1 in func main.
================================================
FILE: docs/sort_facet.md
================================================
Purpose of Docvalues
Background
What are docValues? In the index mapping, there is an option to enable or disable docValues for a specific field mapping. However, what does it actually mean to activate or deactivate docValues, and how does it impact the end user? This document aims to address these questions.
"default_mapping": {
"dynamic": true,
"enabled": true,
"properties": {
"loremIpsum": {
"enabled": true,
"dynamic": false,
"fields": [
{
"name": "loremIpsum",
"type": "text",
"store": false,
"index": true,
"include_term_vectors": false,
"include_in_all": false,
"docvalues": true
}
]
}
}
Enabling docValues will always result in an increase in the size of your Bleve index, leading to a corresponding increase in disk usage. But what advantages can you expect in return? This document also quantitatively assesses this trade-off with a test case.
In a more general sense, we recommend enabling docValues on a field mapping if you anticipate queries that involve sorting and/or facet operations on that field. It's important to note, though, that sorting and faceting will work irrespective of whether docValues are enabled or not. This may lead you to wonder if there's any real benefit to enabling docValues since you're allocating extra disk space without an apparent return. The real advantage, however, becomes evident in enhanced query response times and reduced memory consumption during active usage. By accepting a minor increase in the disk space used by your Full-Text Search (FTS) index, you can anticipate better performance in handling search requests that involve sorting and faceting.
Usage
The initial use of docValues comes into play when sorting is involved. In the search request JSON, there is a field named "sort." This optional "sort" field can have a slice of JSON objects as its value. Each JSON object must belong to one of the following types:
- SortDocID
- SortScore (which is the default if none is specified)
- SortGeoDistance
- SortField
DocValues are relevant only when any of the JSON objects in the "sort" field are of type SortGeoDistance or SortField. This means that if you expect queries on a field F, where the queries either do not specify a value for the "sort" field or provide a JSON object of type SortDocID or SortScore, enabling docValues will not improve sorting operations, and as a result, query latency will remain unchanged. It's worth noting that the default sorting object, SortScore, does not require docValues to be enabled for any of the field mappings. Therefore, a search request without a sorting operation will not utilize docValues at all.
| No Sort Objects |
SortDocID |
SortScore |
SortField |
SortGeoDistance |
{
"explain": true,
"fields": [
"*"
],
"highlight": {},
"query": {
"match": "lorem ipsum",
"field":"dolor"
},
"size": 10,
"from": 0
}
|
{
"explain": true,
"fields": [
"*"
],
"highlight": {},
"query": {
"match": "lorem ipsum",
"field":"sit_amet"
},
"sort":[
{
"by":"id",
"desc":true
}
],
"size": 10,
"from": 0
}
|
{
"explain": true,
"fields": [
"*"
],
"highlight": {},
"query": {
"match": "lorem ipsum",
"field":"sit_amet"
},
"sort":[
{
"by":"score",
}
],
"size": 10,
"from": 0
}
|
{
"explain": true,
"fields": [
"*"
],
"highlight": {},
"query": {
"match": "lorem ipsum",
"field":"sit_amet"
},
"sort":[
{
"by":"field",
"field":"dolor",
"type":"auto",
"mode":"min",
"missing":"last"
}
],
"size": 10,
"from": 0
}
|
{
"explain": true,
"fields": [
"*"
],
"highlight": {},
"query": {
"match": "lorem ipsum",
"field": "dolor"
},
"sort": [
{
"by": "geo_distance",
"field": "sit_amet",
"location": [
123.223,
34.33
],
"unit": "km"
}
],
"size": 10,
"from": 0
}
|
| No DocValues used |
No DocValues used |
No DocValues used |
DocValues used for field "dolor". Field Mapping for "dolor" may enable docValues. |
DocValues used, for field "sit_amet".
Field Mapping for "sit_amet" may enable docValues. |
Now, let's consider faceting. The search request object also includes another field called "facets," where you can specify a collection of facet requests, with each request being associated with a unique name. Each of these facet requests can fall into one of three types:
- Date range
- Numeric range
- Term facet
Enabling docValues for the fields associated with such facet requests might provide benefits in this context.
| No Facet Request |
Date Range Facet |
Numeric Range Facet |
Term Facet |
{
"explain": true,
"fields": [
"*"
],
"highlight": {},
"query": {
"match": "lorem ipsum",
"field": "dolor"
},
"size": 10,
"from": 0
}
|
{
"explain": true,
"fields": [
"*"
],
"highlight": {},
"query": {
"match": "lorem ipsum",
"field": "sit_amet"
},
"facet": {
"facetA": {
"size": 1,
"field": "dolor",
"date_ranges": [
{
"name": "lorem",
"start": "20/August/2001",
"end": "22/August/2002",
"datetime_parser": "custDT"
}
]
}
},
"size": 10,
"from": 0
}
|
{
"explain": true,
"fields": [
"*"
],
"highlight": {},
"query": {
"match": "lorem ipsum",
"field": "sit_amet"
},
"facet": {
"facetA": {
"size": 1,
"field": "dolor",
"numeric_ranges":[
{
"name":"lorem",
"min":22,
"max":34
}
]
}
},
"size": 10,
"from": 0
}
|
{
"explain": true,
"fields": [
"*"
],
"highlight": {},
"query": {
"match": "lorem ipsum",
"field": "sit_amet"
},
"facet": {
"facetA": {
"size": 1,
"field": "dolor"
}
},
"size": 10,
"from": 0
}
|
| No DocValues used |
DocValues used for field "dolor". Field Mapping for "dolor" may enable docValues. |
In summary, when a search request is received by the Bleve index, it extracts all the fields from the sort objects and facet objects. To potentially benefit from docValues, you should consider enabling docValues for the fields mentioned in SortField and SortGeoDistance sort objects, as well as the fields associated with all the facet objects. By doing so, you can optimize sorting and faceting operations in your search queries.
| Combo A |
Combo B |
{
"explain": true,
"fields": [
"*"
],
"highlight": {},
"query": {
"match": "lorem ipsum",
"field": "sit_amet"
},
"facet": {
"facetA": {
"size": 1,
"field": "dolor",
"date_ranges": [
{
"name": "lorem",
"start": "20/August/2001",
"end": "22/August/2002",
"datetime_parser": "custDT"
}
]
}
},
"sort":[
{
"by":"field",
"field":"lorem",
"type":"auto",
"mode":"min",
"missing":"last"
}
],
"size": 10,
"from": 0
}
|
{
"explain": true,
"fields": [
"*"
],
"highlight": {},
"query": {
"match": "lorem ipsum",
"field": "sit_amet"
},
"facet": {
"facetA": {
"size": 1,
"field": "dolor",
"numeric_ranges":[
{
"name":"lorem",
"min":22,
"max":34
}
]
}
},
"sort": [
{
"by": "geo_distance",
"field": "ipsum",
"location": [
123.223,
34.33
],
"unit": "km"
}
],
"size": 10,
"from": 0
}
|
| DocValues used for field "dolor" and "lorem". Field Mapping for "dolor" and "lorem" may enable docValues. |
DocValues used for field "dolor" and "ipsum". Field Mapping for "dolor" and "ipsum" may enable docValues. |
Empirical Analysis
To evaluate our hypothesis, I've set up a sample dataset on my personal computer and I've created two Bleve indexes: one with docvalues enabled for three fields (dummyDate, dummyNumber, and dummyTerm), and another where I've disabled docValues for the same three fields. These field mappings were incorporated into the Default Mapping. It's important to mention that for both indexes, DocValues for dynamic fields were enabled, as the default mapping is dynamic.
The values for dummyDate and dummyNumber were configured to increase monotonically, with dummyDate representing a date value and `dummyNumber` representing a numeric value. This setup was intentional to ensure that facet aggregation would consistently result in cache hits and misses, providing a useful testing scenario.
| Index A |
Index B |
"default_mapping": {
"dynamic": true,
"enabled": true,
"properties": {
"dummyNumber": {
"enabled": true,
"dynamic": false,
"fields": [
{
"name": "dummyNumber",
"type": "text",
"store": false,
"index": true,
"include_term_vectors": false,
"include_in_all": false,
"docvalues": true
}
]
},
"dummyTerm": {
"enabled": true,
"dynamic": false,
"fields": [
{
"name": "dummyTerm",
"type": "text",
"store": false,
"index": true,
"include_term_vectors": false,
"include_in_all": false,
"docvalues": true
}
]
},
"dummyDate": {
"enabled": true,
"dynamic": false,
"fields": [
{
"name": "dummyDate",
"type": "text",
"store": false,
"index": true,
"include_term_vectors": false,
"include_in_all": false,
"docvalues": true
}
]
}
}
}
|
"default_mapping": {
"dynamic": true,
"enabled": true,
"properties": {
"dummyNumber": {
"enabled": true,
"dynamic": false,
"fields": [
{
"name": "dummyNumber",
"type": "text",
"store": false,
"index": true,
"include_term_vectors": false,
"include_in_all": false,
"docvalues": false
}
]
},
"dummyTerm": {
"enabled": true,
"dynamic": false,
"fields": [
{
"name": "dummyTerm",
"type": "text",
"store": false,
"index": true,
"include_term_vectors": false,
"include_in_all": false,
"docvalues": false
}
]
},
"dummyDate": {
"enabled": true,
"dynamic": false,
"fields": [
{
"name": "dummyDate",
"type": "text",
"store": false,
"index": true,
"include_term_vectors": false,
"include_in_all": false,
"docvalues": false
}
]
}
}
}
|
| Docvalues enabled across all three field mappings |
Docvalues disabled across all three field mappings |
Document Format used for the test scenario:
| Document 1 |
Document 2 |
... Document i |
Document 5000 |
{
"dummyTerm":"Term",
"dummyDate":"2000-01-01T00:00:00,
"dummyNumber:1
}
|
{
"dummyTerm":"Term",
"dummyDate":"2000-01-01T01:00:00,
"dummyNumber:2
}
|
{
"dummyTerm":"Term",
"dummyDate":"2000-01-01T01:00:00"+(i hours),
"dummyNumber:i
}
|
{
"dummyTerm":"Term",
"dummyDate":2000-01-01T01:00:00 + (5000 hours),
"dummyNumber:5000
}
|
Now I ran the following set of search requests across both the indexes, while increasing the number of documents indexed from 2000 to 4000.
| Request 1 |
Request 2 |
... Request i |
Request 1000 |
{
"explain": true,
"fields": [
"*"
],
"highlight": {},
"query": {
"match": "term",
"field":"dummyTerm"
},
"facets":{
"myDate":{
"field":"dummyDate",
"size":100000,
"date_ranges":[
{
"start":"2000-01-01T00:00:00",
"end":"2000-01-01T01:00:00"
}
]
},
"myNum":{
"field":"dummyNumber",
"size":100000,
"numeric_ranges":[
{
"min": 1000,
"max": 1001
}
]
}
},
"size": 10,
"from": 0
}
|
{
"explain": true,
"fields": [
"*"
],
"highlight": {},
"query": {
"match": "term",
"field":"dummyTerm"
},
"facets":{
"myDate":{
"field":"dummyDate",
"size":100000,
"date_ranges":[
{
"start":"2000-01-01T01:00:00",
"end":"2000-01-01T02:00:00"
}
]
},
"myNum":{
"field":"dummyNumber",
"size":100000,
"numeric_ranges":[
{
"min": 999,
"max": 1000
}
]
}
},
"size": 10,
"from": 0
}
|
{
"explain": true,
"fields": [
"*"
],
"highlight": {},
"query": {
"match": "term",
"field":"dummyTerm"
},
"facets":{
"myDate":{
"field":"dummyDate",
"size":100000,
"date_ranges":[
{
"start":"2000-01-01T00:00:00" + i hour
"end":"2000-01-01T00:00:00" + (i+1) hour
}
]
},
"myNum":{
"field":"dummyNumber",
"size":100000,
"numeric_ranges":[
{
"min": 1000-i,
"max": 1000-i+1
}
]
}
},
"size": 10,
"from": 0
}
|
{
"explain": true,
"fields": [
"*"
],
"highlight": {},
"query": {
"match": "term",
"field":"dummyTerm"
},
"facets":{
"myDate":{
"field":"dummyDate",
"size":100000,
"date_ranges":[
{
"start":"2000-01-01T01:00:00" + 1000 hour,
"end":"2000-01-01T02:00:00" + 1001 hour
}
]
},
"myNum":{
"field":"dummyNumber",
"size":100000,
"numeric_ranges":[
{
"min": 0,
"max": 1
}
]
}
},
"size": 10,
"from": 0
}
|
| Bleve index size growth with increase in indexed documents |
Total query time for 1000 queries with increase in number of indexed documents |
 |
 |
| Average increase in index size (in bytes) by enabling DocValues |
Average reduction in time taken to perform 1000 queries (in milliseconds) by enabling DocValues |
7762.47 |
27.034 |
Even at this small scale, with a small document size and a very limited number of indexed documents, we still observe a noticeable tradeoff. With just a slight increase in the index size (an average of 7KB) we obtain a 20ms reduction in the total execution time, on average, for only 1000 queries.
Technical Information
When a search request involves facet or sorting operations on a field F, these operations occur after the main search query is executed. For instance, if the main query yields a result of 200 documents, the sorting and faceting processes will be applied to these 200 documents. However, the main query result only provides a set of document IDs, not the actual document contents.
Here's where docValues become essential. If the field mapping for F is docValue enabled, the system can directly access the values for the field from the stored docValue part in the index file. This means that for each document ID returned in the search result, the field values are readily available.
However, if docValues are not enabled for field F, the system must take a different approach. It needs to "fetch the document" from the index file, read the value for field F, and cache this field-document pair in memory for further processing. The issue becomes apparent in the latter scenario. By not enabling docValues for field F, you essentially retrieve all the documents in the search result (at the worst case), which can be a substantial amount of data. Moreover, you have to cache this information in memory, leading to increased memory usage. As a result, query latency significantly suffers because you're essentially fetching and processing all documents, which can be both time-consuming and resource-intensive. Enabling docValues for the relevant fields is, therefore, a crucial optimization to enhance query performance and reduce memory overhead in such situations.
================================================
FILE: docs/synonyms.md
================================================
# Synonym search
* *v2.5.0* (and after) will come with support for **synonym definition indexing and search**.
* We've achieved this by embedding synonym indexes within our bleve (scorch) indexes.
* Usage of zap file format: [v16](https://github.com/blevesearch/zapx/blob/master/zap.md). Here we co-locate text, vector and synonym indexes as neighbors within segments, continuing to conform to the segmented architecture of *scorch*.
## Supported
* Indexing `Synonym Definitions` allows specifying equivalent terms that will be used to construct the synonym index. There are currently two types of `Synonym Definitions` supported:
1. Equivalent Mapping:
In this type, all terms in the *synonyms* list are considered equal and can replace one another. Any of these terms can match a query or document containing any other term in the group, ensuring full synonym coverage.
```json
{
"synonyms": [
"tranquil",
"peaceful",
"calm",
"relaxed",
"unruffled"
]
}
```
2. Explicit Mapping:
In this mapping, only the terms in the *input* list ("blazing") will have the terms in *synonyms* as their synonyms. The input terms are not equivalent to each other, and the synonym relationship is explicitly directional, applying only from the *input* to the *synonyms*.
```json
{
"input": [
"blazing"
],
"synonyms": [
"intense",
"radiant",
"burning",
"fiery",
"glowing"
]
}
```
* The addition of `Synonym Sources` in the index mapping enables associating a set of `synonym definitions` (called a `synonym collection`) with a specific analyzer. This allows for preprocessing of terms in both the *input* and *synonyms* lists before the synonym index is created. By using an analyzer, you can normalize or transform terms (e.g., case folding, stemming) to improve synonym matching.
```json
{
"analysis": {
"synonym_sources": {
"english": {
"collection": "en_thesaurus",
"analyzer": "en"
},
"german": {
"collection": "de_thesaurus",
"analyzer": "de"
}
}
}
}
```
There are two `synonym sources` named "english" and "german," each associated with its respective `synonym collection` and analyzer. In any text field mapping, a `synonym source` can be specified to enable synonym expansion when the field is queried. The analyzer of the synonym source must match the analyzer of the field mapping to which it is applied.
* Any text-based Bleve query (e.g., match, phrase, term, fuzzy, etc.) will use the `synonym source` (if available) for the queried field to expand the search terms using the thesaurus created from user-defined synonym definitions. The behavior for specific query types is as follows:
1. Queries with `fuzziness` parameter: For queries like match, phrase, and match-phrase that support the `fuzziness` parameter, the queried terms are fuzzily matched with the thesaurus's LHS terms to generate candidate terms. These terms are then combined with the results of fuzzy matching against the field dictionary, which contains the terms present in the queried field.
2. Wildcard, Regexp, and Prefix queries: These queries follow a similar approach. First, the thesaurus is used to expand terms (e.g., LHS terms that match the prefix or regex). The resulting terms are then combined with candidate terms from dictionary expansion.
## Indexing
Below is an example of using the Bleve API to define synonym sources, index synonym definitions, and associate them with a text field mapping:
```go
// Define a document to be indexed.
doc := struct {
Text string `json:"text"`
}{
Text: "hardworking employee",
}
// Define a synonym definition where "hardworking" has equivalent terms.
synDef := &bleve.SynonymDefinition{
Synonyms: []string{
"hardworking",
"industrious",
"conscientious",
"persistent",
},
}
// Define the name of the `synonym collection`.
// This collection groups multiple synonym definitions.
synonymCollection := "collection1"
// Define the name of the `synonym source`.
// This source will be associated with specific field mappings.
synonymSourceName := "english"
// Define the analyzer to process terms in the synonym definitions.
// This analyzer must match the one applied to the field using the synonym source.
analyzer := "en"
// Configure the synonym source by associating it with the synonym collection and analyzer.
synonymSourceConfig := map[string]interface{}{
"collection": synonymCollection,
"analyzer": analyzer,
}
// Create a new index mapping.
bleveMapping := bleve.NewIndexMapping()
// Add the synonym source configuration to the index mapping.
err := bleveMapping.AddSynonymSource(synonymSourceName, synonymSourceConfig)
if err != nil {
panic(err)
}
// Create a text field mapping with the specified analyzer and synonym source.
textFieldMapping := bleve.NewTextFieldMapping()
textFieldMapping.Analyzer = analyzer
textFieldMapping.SynonymSource = synonymSourceName
// Associate the text field mapping with the "text" field in the default document mapping.
bleveMapping.DefaultMapping.AddFieldMappingsAt("text", textFieldMapping)
// Create a new index with the specified mapping.
index, err := bleve.New("example.bleve", bleveMapping)
if err != nil {
panic(err)
}
// Index the document into the created index.
err = index.Index("doc1", doc)
if err != nil {
panic(err)
}
// Check if the index supports synonym indexing and add the synonym definition.
if synIndex, ok := index.(bleve.SynonymIndex); ok {
err = synIndex.IndexSynonym("synDoc1", synonymCollection, synDef)
if err != nil {
panic(err)
}
} else {
// If the index does not support synonym indexing, raise an error.
panic("expected synonym index")
}
```
## Querying
```go
// Query the index created above.
// Create a match query for the term "persistent".
query := bleve.NewMatchQuery("persistent")
// Specify the field to search within, in this case, the "text" field.
query.SetField("text")
// Create a search request with the query and enable explanation to understand how results are scored.
searchRequest := bleve.NewSearchRequest(query)
searchRequest.Explain = true
// Execute the search on the index.
searchResult, err := index.Search(searchRequest)
if err != nil {
// Handle any errors that occur during the search.
panic(err)
}
// The search result will contain one match: "doc1". This document includes the term "hardworking",
// which is a synonym for the queried term "persistent". The synonym relationship is based on
// the user-defined thesaurus associated with the index.
// Print the search results, which will include the explanation for the match.
fmt.Println(searchResult)
```
================================================
FILE: docs/vectors.md
================================================
# Nearest neighbor (vector) search
* *v2.4.0* (and after) will come with support for **vectors' indexing and search**.
* We've achieved this by embedding [FAISS](https://github.com/facebookresearch/faiss) indexes within our bleve (scorch) indexes.
* Introduction of a new zap file format: [v16](https://github.com/blevesearch/zapx/blob/master/zap.md) - which will be the default going forward. Here we co-locate text and vector indexes as neighbors within segments, continuing to conform to the segmented architecture of *scorch*.
## Pre-requisite(s)
* Induction of [FAISS](https://github.com/blevesearch/faiss) into our eco system, which is a fork of the original [facebookresearch/faiss](https://github.com/facebookresearch/faiss)
* FAISS is a C++ library that needs to be compiled and it's shared libraries need to be situated at an accessible path for your application.
* A `vectors` GO TAG needs to be set for bleve to access all the supporting code. This TAG must be set only after the FAISS shared library is made available. Failure to do either will inhibit you from using this feature.
* Please follow these [instructions](#setup-instructions) below for any assistance in the area.
* Releases of `blevesearch/bleve` work with select checkpoints of `blevesearch/faiss` owing to API changes and improvements (tracking over the `bleve` branch):
| bleve version(s) | blevesearch/faiss version |
| --- | --- |
| `v2.4.0` | [blevesearch/faiss@7b119f4](https://github.com/blevesearch/faiss/tree/7b119f4b9c408989b696b36f8cc53908e53de6db) (modified v1.7.4) |
| `v2.4.1`, `v2.4.2` | [blevesearch/faiss@d9db66a](https://github.com/blevesearch/faiss/tree/d9db66a38518d99eb334218697e1df0732f3fdf8) (modified v1.7.4) |
| `v2.4.3`, `v2.4.4` | [blevesearch/faiss@b747c55](https://github.com/blevesearch/faiss/tree/b747c55a93a9627039c34d44b081f375dca94e57) (modified v1.8.0) |
| `v2.5.0`, `v2.5.1` | [blevesearch/faiss@352484e](https://github.com/blevesearch/faiss/tree/352484e0fc9d1f8f46737841efe5f26e0f383f71) (modified v1.10.0) |
| `v2.5.2`, `v2.5.3`, `v2.5.4` | [blevesearch/faiss@b3d4e00](https://github.com/blevesearch/faiss/tree/b3d4e00a69425b95e0b283da7801efc9f66b580d) (modified v1.11.0) |
| `v2.5.5`, `v2.5.6`, `v2.5.7` | [blevesearch/faiss@8a59a0c](https://github.com/blevesearch/faiss/tree/8a59a0c552fa2d14fa871f6b6bc793de1d277f5e) (modified v1.12.0) |
| `v2.6.0` | [blevesearch/faiss@1f14a3e](https://github.com/blevesearch/faiss/tree/1f14a3e4ed5ec1efcb0a66055516980da5d0a453) (modified v1.13.2) |
## Supported
* The `vector` field type is an array that is to hold float32 values only.
* The `vector_base64` field type to support base64 encoded strings using little endian byte ordering (v2.4.1+)
* Supported similarity metrics are: [`"cosine"` (v2.4.3+), `"dot_product"`, `"l2_norm"`].
* `cosine` paths will additionally normalize vectors before indexing and search.
* Supported dimensionality is between 1 and 2048 (v2.4.0), and up to **4096** (v2.4.1+).
* Supported vector index optimizations: `latency`, `memory_efficient` (v2.4.1+), `recall`.
* Vectors from documents that do not conform to the index mapping dimensionality are simply discarded at index time.
* The dimensionality of the query vector must match the dimensionality of the indexed vectors to obtain any results.
* Pure kNN searches can be performed, but the `query` attribute within the search request must be set - to `{"match_none": {}}` in this case. The `query` attribute is made optional when `knn` is available with v2.4.1+.
* Hybrid searches are supported, where results from `query` are unioned (for now) with results from `knn`. The tf-idf scores from exact searches are simply summed with the similarity distances to determine the aggregate scores.
```text
aggregate_score = (query_boost * query_hit_score) + (knn_boost * knn_hit_distance)
```
* Advanced score fusion strategies (v2.5.4+) are available if requested for - see [score fusion](score_fusion.md#score-fusion-for-hybrid-search).
* Multi kNN searches are supported - the `knn` object within the search request accepts an array of requests. These sub objects are unioned by default but this behavior can be overridden by setting `knn_operator` to `"and"`.
* Previously supported pagination settings will work as they were, with size/limit being applied over the top-K hits combined with any exact search hits.
* Pre-filtered vector and hybrid search (v2.4.3+): Apply any Bleve filter query first to narrow down candidates before running kNN search, making vector and hybrid searches faster and more relevant.
* Fields containing multiple vectors (v2.5.7+):
* A single document may contain multiple vectors within the same field, in the form of either:
* an array of vectors (multi-vector field)
* an array of objects each containing a vector (nested-vector field)
* **All vectors in the field must share the same dimensionality**.
* For single-kNN queries, each document is scored using its single best-matching vector.
* For multi-kNN queries, the system selects the best-matching vector for each query vector within the document.
## Indexing
```go
// Example document with single-vector, multi-vector, and nested-vector fields
doc := struct {
Id string `json:"id"`
Text string `json:"text"`
Vec []float32 `json:"vec"` // Single-vector field
Embeddings [][]float32 `json:"embeddings"` // Multi-vector field: array of vectors (v2.5.7+)
Sections []struct { // Nested-vector field: array of objects with vectors (v2.5.7+)
Text string `json:"text"`
Vec []float32 `json:"vec"`
} `json:"sections"`
}{
Id: "example",
Text: "hello from united states",
Vec: []float32{0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, // Single-vector field of dimensionality 10
Embeddings: [][]float32{ // Multi-vector field containing 2 vectors of dimensionality 10
{10, 11, 12, 13, 14, 15, 16, 17, 18, 19}, // First vector
{20, 21, 22, 23, 24, 25, 26, 27, 28, 29}, // Second vector
},
Sections: []struct { // Nested-vector field containing 2 objects each with a vector of dimensionality 10
Text string `json:"text"`
Vec []float32 `json:"vec"`
}{
{Text: "first section", Vec: []float32{30, 31, 32, 33, 34, 35, 36, 37, 38, 39}},
{Text: "second section", Vec: []float32{40, 41, 42, 43, 44, 45, 46, 47, 48, 49}},
},
}
// Field mappings
textFieldMapping := bleve.NewTextFieldMapping()
vectorFieldMapping := bleve.NewVectorFieldMapping()
vectorFieldMapping.Dims = 10 // Set vector dimensionality
vectorFieldMapping.Similarity = "l2_norm" // Euclidean distance
// Sub-document mappings
sectionsMapping := bleve.NewDocumentMapping()
sectionsMapping.AddFieldMappingsAt("text", textFieldMapping)
sectionsMapping.AddFieldMappingsAt("vec", vectorFieldMapping)
// Index mapping
bleveMapping := bleve.NewIndexMapping()
bleveMapping.DefaultMapping.AddFieldMappingsAt("text", textFieldMapping)
bleveMapping.DefaultMapping.AddFieldMappingsAt("vec", vectorFieldMapping) // Single-vector
bleveMapping.DefaultMapping.AddFieldMappingsAt("embeddings", vectorFieldMapping) // Multi-vector
bleveMapping.DefaultMapping.AddSubDocumentMapping("sections", sectionsMapping) // Nested-vector
// Create the index
index, err := bleve.New("example.bleve", bleveMapping)
if err != nil {
panic(err)
}
// Index the document
err = index.Index(doc.Id, doc)
if err != nil {
panic(err)
}
```
## Querying
```go
// ------------------------------------
// Single-vector field search (v2.4.0+)
// ------------------------------------
searchRequest := bleve.NewSearchRequest(bleve.NewMatchNoneQuery())
searchRequest.AddKNN(
"vec", // Vector field
[]float32{0, 1, 1, 4, 4, 5, 7, 6, 8, 9}, // Query vector
5, // top-k
1, // boost
)
searchResult, err := index.Search(searchRequest)
if err != nil {
panic(err)
}
// Scores are 1 / squared L2 distance, e.g., score = 0.25 for squared distance of 4
fmt.Printf("Single-vector field kNN result:\n%s\n", searchResult)
```
```go
// -----------------------------------
// Multi-vector field search (v2.5.7+)
// -----------------------------------
searchRequest = bleve.NewSearchRequest(bleve.NewMatchNoneQuery())
searchRequest.AddKNN(
"embeddings",
[]float32{0, 1, 1, 4, 4, 5, 7, 6, 8, 9},
5,
1.0,
)
searchResult, err = index.Search(searchRequest)
if err != nil {
panic(err)
}
// Scores are based on the **best-matching vector** from the multi-vector field.
// Example: distances to doc vectors {10..19} and {20..29} → pick the closer one (smaller squared L2),
// then score = 1 / squared L2 distance.
fmt.Printf("Multi-vector field kNN result:\n%s\n", searchResult)
```
```go
// ------------------------------------
// Nested-vector field search (v2.5.7+)
// ------------------------------------
searchRequest = bleve.NewSearchRequest(bleve.NewMatchNoneQuery())
searchRequest.AddKNN(
"sections.vec",
[]float32{0, 1, 1, 4, 4, 5, 7, 6, 8, 9},
5,
1.0,
)
searchResult, err = index.Search(searchRequest)
if err != nil {
panic(err)
}
// Scores are based on the **best-matching vector** from the nested-vector field.
// Example: distances to doc vectors {30..39} and {40..49} → pick the closer one (smaller squared L2),
// then score = 1 / squared L2 distance.
fmt.Printf("Nested-vector field kNN result:\n%s\n", searchResult)
```
```go
// -----------------------------------------------------
// Multi kNN queries on multi-vector documents (v2.5.7+)
// -----------------------------------------------------
searchRequest = bleve.NewSearchRequest(bleve.NewMatchNoneQuery())
searchRequest.AddKNN(
"embeddings",
[]float32{0, 1, 1, 4, 4, 5, 7, 6, 8, 9},
5,
1.0,
)
searchRequest.AddKNN(
"embeddings",
[]float32{1, 2, 2, 5, 5, 6, 8, 7, 9, 10},
8,
1.0,
)
searchResult, err = index.Search(searchRequest)
if err != nil {
panic(err)
}
// Document score explanation:
// - For each query vector, Bleve selects the **closest vector** in the multi-vector field.
// - Scores from multiple queries are then **normalized and summed** to get the final document score.
// For example, if the closest vector to the first query has squared L2 distance 4 (score 0.25)
// and the closest vector to the second query has squared L2 distance 1 (score 1.0),
// and both queries use equal boost values of 1.0, the normalization factor is 1/√2 (where 2 is the number of kNN queries).
// Then the total document score = 1/√2 * 0.25 + 1/√2 * 1.0 = 0.1768 + 0.7071 = 0.8839.
// Note: If the boost values differ, or if more queries are used, the normalization factor and score calculation will change accordingly.
fmt.Printf("Multi kNN queries result:\n%s\n", searchResult)
```
```go
// --------------------------------------
// Hybrid search: text + vector (v2.4.0+)
// --------------------------------------
searchRequest = bleve.NewSearchRequest(bleve.NewMatchQuery("united states"))
searchRequest.AddKNN(
"vec",
[]float32{0, 1, 1, 4, 4, 5, 7, 6, 8, 9},
5,
1,
)
searchResult, err = index.Search(searchRequest)
if err != nil {
panic(err)
}
// Score = sum of text relevance score + kNN vector score
// Example: text score 0.5 + vector score 0.25 = total score 0.75
fmt.Printf("Hybrid search result:\n%s\n", searchResult)
```
## Querying with filters (v2.4.3+)
```go
// Pre-filtered vector/hybrid search: filter query narrows candidates before KNN search
searchRequest = bleve.NewSearchRequest(bleve.NewMatchNoneQuery()) // replace with any Bleve query for Pre-filtered Hybrid Search
filterQuery := bleve.NewTermQuery("hello") // Filter query to narrow candidates
searchRequest.AddKNNWithFilter(
"vec", // Vector field name
[]float32{0, 1, 1, 4, 4, 5, 7, 6, 8, 9}, // Query vector (must match indexed vector dims)
5, // Number of nearest neighbors to return (k)
1, // Boost factor for KNN score
filterQuery, // Filter query applied before KNN search
)
searchResult, err = index.Search(searchRequest)
if err != nil {
panic(err)
}
// Scores are computed only among documents matching the filter query
// Example: if only one document matches the filter and has squared L2 distance 4 to the query vector,
// its score will be 0.25 (1 / 4) and returned as the top result.
fmt.Printf("Pre-filtered kNN search result:\n%s\n", searchResult)
```
## Setup Instructions
* Using `cmake` is a recommended approach by FAISS authors.
* More details here - [faiss/INSTALL](https://github.com/blevesearch/faiss/blob/main/INSTALL.md).
### Linux
Also documented here - [go-faiss/README](https://github.com/blevesearch/go-faiss/blob/master/README.md).
```shell
git clone https://github.com/blevesearch/faiss.git
cd faiss
cmake -B build -DFAISS_ENABLE_GPU=OFF -DFAISS_ENABLE_C_API=ON -DBUILD_SHARED_LIBS=ON .
make -C build
sudo make -C build install
```
Building will produce the dynamic library `faiss_c`. You will need to install it in a place where your system will find it (e.g. /usr/lib). You can do this with:
```shell
sudo cp build/c_api/libfaiss_c.so /usr/local/lib
```
### OSX
While you shouldn't need to do any different over osX x86_64, with aarch64 - some instructions need adjusting (see [facebookresearch/faiss#2111](https://github.com/facebookresearch/faiss/issues/2111)) ..
```shell
LDFLAGS="-L/opt/homebrew/opt/llvm/lib" CPPFLAGS="-I/opt/homebrew/opt/llvm/include" CXX=/opt/homebrew/opt/llvm/bin/clang++ CC=/opt/homebrew/opt/llvm/bin/clang cmake -B build -DFAISS_ENABLE_GPU=OFF -DFAISS_ENABLE_C_API=ON -DBUILD_SHARED_LIBS=ON -DFAISS_ENABLE_PYTHON=OFF .
make -C build
sudo make -C build install
sudo cp build/c_api/libfaiss_c.dylib /usr/local/lib
```
### Sanity check
Once the supporting library is built and made available, a sanity run is recommended to make sure all unit tests and especially those accessing the vectors' code pass. Here's how ..
```shell
go test -ldflags "-r /usr/local/lib" ./... -tags=vectors
```
================================================
FILE: document/document.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeDocument int
func init() {
var d Document
reflectStaticSizeDocument = int(reflect.TypeOf(d).Size())
}
type Document struct {
id string
Fields []Field `json:"fields"`
NestedDocuments []*Document `json:"nested_documents"`
CompositeFields []*CompositeField
StoredFieldsSize uint64
indexed bool
}
func (d *Document) StoredFieldsBytes() uint64 {
return d.StoredFieldsSize
}
func NewDocument(id string) *Document {
return &Document{
id: id,
Fields: make([]Field, 0),
CompositeFields: make([]*CompositeField, 0),
}
}
func NewSynonymDocument(id string) *Document {
return &Document{
id: id,
Fields: make([]Field, 0),
}
}
func (d *Document) Size() int {
sizeInBytes := reflectStaticSizeDocument + size.SizeOfPtr +
len(d.id)
for _, entry := range d.Fields {
sizeInBytes += entry.Size()
}
for _, entry := range d.CompositeFields {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
func (d *Document) AddField(f Field) *Document {
switch f := f.(type) {
case *CompositeField:
d.CompositeFields = append(d.CompositeFields, f)
default:
d.Fields = append(d.Fields, f)
}
return d
}
func (d *Document) GoString() string {
fields := ""
for i, field := range d.Fields {
if i != 0 {
fields += ", "
}
fields += fmt.Sprintf("%#v", field)
}
compositeFields := ""
for i, field := range d.CompositeFields {
if i != 0 {
compositeFields += ", "
}
compositeFields += fmt.Sprintf("%#v", field)
}
return fmt.Sprintf("&document.Document{ID:%s, Fields: %s, CompositeFields: %s}", d.ID(), fields, compositeFields)
}
func (d *Document) NumPlainTextBytes() uint64 {
rv := uint64(0)
for _, field := range d.Fields {
rv += field.NumPlainTextBytes()
}
for _, compositeField := range d.CompositeFields {
for _, field := range d.Fields {
if compositeField.includesField(field.Name()) {
rv += field.NumPlainTextBytes()
}
}
}
return rv
}
func (d *Document) ID() string {
return d.id
}
func (d *Document) SetID(id string) {
d.id = id
}
func (d *Document) AddIDField() {
d.AddField(NewTextFieldCustom("_id", nil, []byte(d.ID()), index.IndexField|index.StoreField, nil))
}
func (d *Document) VisitFields(visitor index.FieldVisitor) {
for _, f := range d.Fields {
visitor(f)
}
}
func (d *Document) VisitComposite(visitor index.CompositeFieldVisitor) {
for _, f := range d.CompositeFields {
visitor(f)
}
}
func (d *Document) HasComposite() bool {
return len(d.CompositeFields) > 0
}
func (d *Document) VisitSynonymFields(visitor index.SynonymFieldVisitor) {
for _, f := range d.Fields {
if sf, ok := f.(index.SynonymField); ok {
visitor(sf)
}
}
}
func (d *Document) SetIndexed() {
d.indexed = true
}
func (d *Document) Indexed() bool {
return d.indexed
}
func (d *Document) AddNestedDocument(doc *Document) {
d.NestedDocuments = append(d.NestedDocuments, doc)
}
func (d *Document) VisitNestedDocuments(visitor func(doc index.Document)) {
for _, doc := range d.NestedDocuments {
visitor(doc)
}
}
================================================
FILE: document/document_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"testing"
)
func TestDocumentNumPlainTextBytes(t *testing.T) {
tests := []struct {
doc *Document
num uint64
}{
{
doc: NewDocument("a"),
num: 0,
},
{
doc: NewDocument("b").
AddField(NewTextField("name", nil, []byte("hello"))),
num: 5,
},
{
doc: NewDocument("c").
AddField(NewTextField("name", nil, []byte("hello"))).
AddField(NewTextField("desc", nil, []byte("x"))),
num: 6,
},
{
doc: NewDocument("d").
AddField(NewTextField("name", nil, []byte("hello"))).
AddField(NewTextField("desc", nil, []byte("x"))).
AddField(NewNumericField("age", nil, 1.0)),
num: 14,
},
{
doc: NewDocument("e").
AddField(NewTextField("name", nil, []byte("hello"))).
AddField(NewTextField("desc", nil, []byte("x"))).
AddField(NewNumericField("age", nil, 1.0)).
AddField(NewCompositeField("_all", true, nil, nil)),
num: 28,
},
{
doc: NewDocument("e").
AddField(NewTextField("name", nil, []byte("hello"))).
AddField(NewTextField("desc", nil, []byte("x"))).
AddField(NewNumericField("age", nil, 1.0)).
AddField(NewCompositeField("_all", true, nil, []string{"age"})),
num: 20,
},
}
for _, test := range tests {
actual := test.doc.NumPlainTextBytes()
if actual != test.num {
t.Errorf("expected doc '%s' to have %d plain text bytes, got %d", test.doc.ID(), test.num, actual)
}
}
}
================================================
FILE: document/field.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
index "github.com/blevesearch/bleve_index_api"
)
type Field interface {
// Name returns the path of the field from the root DocumentMapping.
// A root field path is "field", a subdocument field is "parent.field".
Name() string
// ArrayPositions returns the intermediate document and field indices
// required to resolve the field value in the document. For example, if the
// field path is "doc1.doc2.field" where doc1 and doc2 are slices or
// arrays, ArrayPositions returns 2 indices used to resolve "doc2" value in
// "doc1", then "field" in "doc2".
ArrayPositions() []uint64
Options() index.FieldIndexingOptions
Analyze()
Value() []byte
// NumPlainTextBytes should return the number of plain text bytes
// that this field represents - this is a common metric for tracking
// the rate of indexing
NumPlainTextBytes() uint64
Size() int
EncodedFieldType() byte
AnalyzedLength() int
AnalyzedTokenFrequencies() index.TokenFrequencies
}
================================================
FILE: document/field_boolean.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeBooleanField int
func init() {
var f BooleanField
reflectStaticSizeBooleanField = int(reflect.TypeOf(f).Size())
}
const DefaultBooleanIndexingOptions = index.StoreField | index.IndexField | index.DocValues
type BooleanField struct {
name string
arrayPositions []uint64
options index.FieldIndexingOptions
value []byte
numPlainTextBytes uint64
length int
frequencies index.TokenFrequencies
}
func (b *BooleanField) Size() int {
var freqSize int
if b.frequencies != nil {
freqSize = b.frequencies.Size()
}
return reflectStaticSizeBooleanField + size.SizeOfPtr +
len(b.name) +
len(b.arrayPositions)*size.SizeOfUint64 +
len(b.value) +
freqSize
}
func (b *BooleanField) Name() string {
return b.name
}
func (b *BooleanField) ArrayPositions() []uint64 {
return b.arrayPositions
}
func (b *BooleanField) Options() index.FieldIndexingOptions {
return b.options
}
func (b *BooleanField) Analyze() {
tokens := make(analysis.TokenStream, 0)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(b.value),
Term: b.value,
Position: 1,
Type: analysis.Boolean,
})
b.length = len(tokens)
b.frequencies = analysis.TokenFrequency(tokens, b.arrayPositions, b.options)
}
func (b *BooleanField) Value() []byte {
return b.value
}
func (b *BooleanField) Boolean() (bool, error) {
if len(b.value) == 1 {
return b.value[0] == 'T', nil
}
return false, fmt.Errorf("boolean field has %d bytes", len(b.value))
}
func (b *BooleanField) GoString() string {
return fmt.Sprintf("&document.BooleanField{Name:%s, Options: %s, Value: %s}", b.name, b.options, b.value)
}
func (b *BooleanField) NumPlainTextBytes() uint64 {
return b.numPlainTextBytes
}
func (b *BooleanField) EncodedFieldType() byte {
return 'b'
}
func (b *BooleanField) AnalyzedLength() int {
return b.length
}
func (b *BooleanField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return b.frequencies
}
func NewBooleanFieldFromBytes(name string, arrayPositions []uint64, value []byte) *BooleanField {
return &BooleanField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultBooleanIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewBooleanField(name string, arrayPositions []uint64, b bool) *BooleanField {
return NewBooleanFieldWithIndexingOptions(name, arrayPositions, b, DefaultBooleanIndexingOptions)
}
func NewBooleanFieldWithIndexingOptions(name string, arrayPositions []uint64, b bool, options index.FieldIndexingOptions) *BooleanField {
numPlainTextBytes := 5
v := []byte("F")
if b {
numPlainTextBytes = 4
v = []byte("T")
}
return &BooleanField{
name: name,
arrayPositions: arrayPositions,
value: v,
options: options,
numPlainTextBytes: uint64(numPlainTextBytes),
}
}
================================================
FILE: document/field_composite.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"reflect"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeCompositeField int
func init() {
var cf CompositeField
reflectStaticSizeCompositeField = int(reflect.TypeOf(cf).Size())
}
const DefaultCompositeIndexingOptions = index.IndexField
type CompositeField struct {
name string
includedFields map[string]bool
excludedFields map[string]bool
defaultInclude bool
options index.FieldIndexingOptions
totalLength int
compositeFrequencies index.TokenFrequencies
}
func NewCompositeField(name string, defaultInclude bool, include []string, exclude []string) *CompositeField {
return NewCompositeFieldWithIndexingOptions(name, defaultInclude, include, exclude, DefaultCompositeIndexingOptions)
}
func NewCompositeFieldWithIndexingOptions(name string, defaultInclude bool, include []string, exclude []string, options index.FieldIndexingOptions) *CompositeField {
rv := &CompositeField{
name: name,
options: options,
defaultInclude: defaultInclude,
includedFields: make(map[string]bool, len(include)),
excludedFields: make(map[string]bool, len(exclude)),
compositeFrequencies: make(index.TokenFrequencies),
}
for _, i := range include {
rv.includedFields[i] = true
}
for _, e := range exclude {
rv.excludedFields[e] = true
}
return rv
}
func (c *CompositeField) Size() int {
sizeInBytes := reflectStaticSizeCompositeField + size.SizeOfPtr +
len(c.name)
for k := range c.includedFields {
sizeInBytes += size.SizeOfString + len(k) + size.SizeOfBool
}
for k := range c.excludedFields {
sizeInBytes += size.SizeOfString + len(k) + size.SizeOfBool
}
if c.compositeFrequencies != nil {
sizeInBytes += c.compositeFrequencies.Size()
}
return sizeInBytes
}
func (c *CompositeField) Name() string {
return c.name
}
func (c *CompositeField) ArrayPositions() []uint64 {
return []uint64{}
}
func (c *CompositeField) Options() index.FieldIndexingOptions {
return c.options
}
func (c *CompositeField) Analyze() {
}
func (c *CompositeField) Value() []byte {
return []byte{}
}
func (c *CompositeField) NumPlainTextBytes() uint64 {
return 0
}
func (c *CompositeField) includesField(field string) bool {
shouldInclude := c.defaultInclude
_, fieldShouldBeIncluded := c.includedFields[field]
if fieldShouldBeIncluded {
shouldInclude = true
}
_, fieldShouldBeExcluded := c.excludedFields[field]
if fieldShouldBeExcluded {
shouldInclude = false
}
return shouldInclude
}
func (c *CompositeField) Compose(field string, length int, freq index.TokenFrequencies) {
if c.includesField(field) {
c.totalLength += length
c.compositeFrequencies.MergeAll(field, freq)
}
}
func (c *CompositeField) EncodedFieldType() byte {
return 'c'
}
func (c *CompositeField) AnalyzedLength() int {
return c.totalLength
}
func (c *CompositeField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return c.compositeFrequencies
}
================================================
FILE: document/field_datetime.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"bytes"
"fmt"
"math"
"reflect"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var dateTimeValueSeperator = []byte{'\xff'}
var reflectStaticSizeDateTimeField int
func init() {
var f DateTimeField
reflectStaticSizeDateTimeField = int(reflect.TypeOf(f).Size())
}
const DefaultDateTimeIndexingOptions = index.StoreField | index.IndexField | index.DocValues
const DefaultDateTimePrecisionStep uint = 4
var MinTimeRepresentable = time.Unix(0, math.MinInt64)
var MaxTimeRepresentable = time.Unix(0, math.MaxInt64)
type DateTimeField struct {
name string
arrayPositions []uint64
options index.FieldIndexingOptions
value numeric.PrefixCoded
numPlainTextBytes uint64
length int
frequencies index.TokenFrequencies
}
func (n *DateTimeField) Size() int {
var freqSize int
if n.frequencies != nil {
freqSize = n.frequencies.Size()
}
return reflectStaticSizeDateTimeField + size.SizeOfPtr +
len(n.name) +
len(n.arrayPositions)*size.SizeOfUint64 +
len(n.value) +
freqSize
}
func (n *DateTimeField) Name() string {
return n.name
}
func (n *DateTimeField) ArrayPositions() []uint64 {
return n.arrayPositions
}
func (n *DateTimeField) Options() index.FieldIndexingOptions {
return n.options
}
func (n *DateTimeField) EncodedFieldType() byte {
return 'd'
}
func (n *DateTimeField) AnalyzedLength() int {
return n.length
}
func (n *DateTimeField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return n.frequencies
}
// split the value into the prefix coded date and the layout
// using the dateTimeValueSeperator as the split point
func (n *DateTimeField) splitValue() (numeric.PrefixCoded, string) {
parts := bytes.SplitN(n.value, dateTimeValueSeperator, 2)
if len(parts) == 1 {
return numeric.PrefixCoded(parts[0]), ""
}
return numeric.PrefixCoded(parts[0]), string(parts[1])
}
func (n *DateTimeField) Analyze() {
valueWithoutLayout, _ := n.splitValue()
tokens := make(analysis.TokenStream, 0)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(valueWithoutLayout),
Term: valueWithoutLayout,
Position: 1,
Type: analysis.DateTime,
})
original, err := valueWithoutLayout.Int64()
if err == nil {
shift := DefaultDateTimePrecisionStep
for shift < 64 {
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
if err != nil {
break
}
token := analysis.Token{
Start: 0,
End: len(shiftEncoded),
Term: shiftEncoded,
Position: 1,
Type: analysis.DateTime,
}
tokens = append(tokens, &token)
shift += DefaultDateTimePrecisionStep
}
}
n.length = len(tokens)
n.frequencies = analysis.TokenFrequency(tokens, n.arrayPositions, n.options)
}
func (n *DateTimeField) Value() []byte {
return n.value
}
func (n *DateTimeField) DateTime() (time.Time, string, error) {
date, layout := n.splitValue()
i64, err := date.Int64()
if err != nil {
return time.Time{}, "", err
}
return time.Unix(0, i64).UTC(), layout, nil
}
func (n *DateTimeField) GoString() string {
return fmt.Sprintf("&document.DateField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func (n *DateTimeField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func NewDateTimeFieldFromBytes(name string, arrayPositions []uint64, value []byte) *DateTimeField {
return &DateTimeField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultDateTimeIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewDateTimeField(name string, arrayPositions []uint64, dt time.Time, layout string) (*DateTimeField, error) {
return NewDateTimeFieldWithIndexingOptions(name, arrayPositions, dt, layout, DefaultDateTimeIndexingOptions)
}
func NewDateTimeFieldWithIndexingOptions(name string, arrayPositions []uint64, dt time.Time, layout string, options index.FieldIndexingOptions) (*DateTimeField, error) {
if canRepresent(dt) {
dtInt64 := dt.UnixNano()
prefixCoded := numeric.MustNewPrefixCodedInt64(dtInt64, 0)
// The prefixCoded value is combined with the layout.
// This is necessary because the storage layer stores a fields value as a byte slice
// without storing extra information like layout. So by making value = prefixCoded + layout,
// both pieces of information are stored in the byte slice.
// During a query, the layout is extracted from the byte slice stored to correctly
// format the prefixCoded value.
valueWithLayout := append(prefixCoded, dateTimeValueSeperator...)
valueWithLayout = append(valueWithLayout, []byte(layout)...)
return &DateTimeField{
name: name,
arrayPositions: arrayPositions,
value: valueWithLayout,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}, nil
}
return nil, fmt.Errorf("cannot represent %s in this type", dt)
}
func canRepresent(dt time.Time) bool {
if dt.Before(MinTimeRepresentable) || dt.After(MaxTimeRepresentable) {
return false
}
return true
}
================================================
FILE: document/field_geopoint.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeGeoPointField int
func init() {
var f GeoPointField
reflectStaticSizeGeoPointField = int(reflect.TypeOf(f).Size())
}
var GeoPrecisionStep uint = 9
type GeoPointField struct {
name string
arrayPositions []uint64
options index.FieldIndexingOptions
value numeric.PrefixCoded
numPlainTextBytes uint64
length int
frequencies index.TokenFrequencies
spatialplugin index.SpatialAnalyzerPlugin
}
func (n *GeoPointField) Size() int {
var freqSize int
if n.frequencies != nil {
freqSize = n.frequencies.Size()
}
return reflectStaticSizeGeoPointField + size.SizeOfPtr +
len(n.name) +
len(n.arrayPositions)*size.SizeOfUint64 +
len(n.value) +
freqSize
}
func (n *GeoPointField) Name() string {
return n.name
}
func (n *GeoPointField) ArrayPositions() []uint64 {
return n.arrayPositions
}
func (n *GeoPointField) Options() index.FieldIndexingOptions {
return n.options
}
func (n *GeoPointField) EncodedFieldType() byte {
return 'g'
}
func (n *GeoPointField) AnalyzedLength() int {
return n.length
}
func (n *GeoPointField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return n.frequencies
}
func (n *GeoPointField) Analyze() {
tokens := make(analysis.TokenStream, 0, 8)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(n.value),
Term: n.value,
Position: 1,
Type: analysis.Numeric,
})
if n.spatialplugin != nil {
lat, _ := n.Lat()
lon, _ := n.Lon()
p := &geo.Point{Lat: lat, Lon: lon}
terms := n.spatialplugin.GetIndexTokens(p)
for _, term := range terms {
token := analysis.Token{
Start: 0,
End: len(term),
Term: []byte(term),
Position: 1,
Type: analysis.AlphaNumeric,
}
tokens = append(tokens, &token)
}
} else {
original, err := n.value.Int64()
if err == nil {
shift := GeoPrecisionStep
for shift < 64 {
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
if err != nil {
break
}
token := analysis.Token{
Start: 0,
End: len(shiftEncoded),
Term: shiftEncoded,
Position: 1,
Type: analysis.Numeric,
}
tokens = append(tokens, &token)
shift += GeoPrecisionStep
}
}
}
n.length = len(tokens)
n.frequencies = analysis.TokenFrequency(tokens, n.arrayPositions, n.options)
}
func (n *GeoPointField) Value() []byte {
return n.value
}
func (n *GeoPointField) Lon() (float64, error) {
i64, err := n.value.Int64()
if err != nil {
return 0.0, err
}
return geo.MortonUnhashLon(uint64(i64)), nil
}
func (n *GeoPointField) Lat() (float64, error) {
i64, err := n.value.Int64()
if err != nil {
return 0.0, err
}
return geo.MortonUnhashLat(uint64(i64)), nil
}
func (n *GeoPointField) GoString() string {
return fmt.Sprintf("&document.GeoPointField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func (n *GeoPointField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func NewGeoPointFieldFromBytes(name string, arrayPositions []uint64, value []byte) *GeoPointField {
return &GeoPointField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewGeoPointField(name string, arrayPositions []uint64, lon, lat float64) *GeoPointField {
return NewGeoPointFieldWithIndexingOptions(name, arrayPositions, lon, lat, DefaultNumericIndexingOptions)
}
func NewGeoPointFieldWithIndexingOptions(name string, arrayPositions []uint64, lon, lat float64, options index.FieldIndexingOptions) *GeoPointField {
mhash := geo.MortonHash(lon, lat)
prefixCoded := numeric.MustNewPrefixCodedInt64(int64(mhash), 0)
// docvalues are always enabled for geopoint fields, even if the
// indexing options are set to not include docvalues.
// snappy compression and chunking are always skipped for geopoint
// to avoid mem copies and faster lookups.
options |= index.DocValues
options |= index.SkipDVChunking
options |= index.SkipDVCompression
return &GeoPointField{
name: name,
arrayPositions: arrayPositions,
value: prefixCoded,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}
}
// SetSpatialAnalyzerPlugin implements the
// index.TokenisableSpatialField interface.
func (n *GeoPointField) SetSpatialAnalyzerPlugin(
plugin index.SpatialAnalyzerPlugin) {
n.spatialplugin = plugin
}
================================================
FILE: document/field_geopoint_test.go
================================================
package document
import "testing"
func TestGeoPointField(t *testing.T) {
gf := NewGeoPointField("loc", []uint64{}, 0.0015, 0.0015)
gf.Analyze()
numTokens := gf.AnalyzedLength()
tokenFreqs := gf.AnalyzedTokenFrequencies()
if numTokens != 8 {
t.Errorf("expected 8 tokens, got %d", numTokens)
}
if len(tokenFreqs) != 8 {
t.Errorf("expected 8 token freqs")
}
}
================================================
FILE: document/field_geoshape.go
================================================
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
"github.com/blevesearch/geo/geojson"
)
var reflectStaticSizeGeoShapeField int
func init() {
var f GeoShapeField
reflectStaticSizeGeoShapeField = int(reflect.TypeOf(f).Size())
}
const DefaultGeoShapeIndexingOptions = index.IndexField | index.DocValues
type GeoShapeField struct {
name string
shape index.GeoJSON
arrayPositions []uint64
options index.FieldIndexingOptions
numPlainTextBytes uint64
length int
encodedValue []byte
value []byte
frequencies index.TokenFrequencies
}
func (n *GeoShapeField) Size() int {
var freqSize int
if n.frequencies != nil {
freqSize = n.frequencies.Size()
}
return reflectStaticSizeGeoShapeField + size.SizeOfPtr +
len(n.name) +
len(n.arrayPositions)*size.SizeOfUint64 +
len(n.encodedValue) +
len(n.value) +
freqSize
}
func (n *GeoShapeField) Name() string {
return n.name
}
func (n *GeoShapeField) ArrayPositions() []uint64 {
return n.arrayPositions
}
func (n *GeoShapeField) Options() index.FieldIndexingOptions {
return n.options
}
func (n *GeoShapeField) EncodedFieldType() byte {
return 's'
}
func (n *GeoShapeField) AnalyzedLength() int {
return n.length
}
func (n *GeoShapeField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return n.frequencies
}
func (n *GeoShapeField) Analyze() {
// compute the bytes representation for the coordinates
tokens := make(analysis.TokenStream, 0)
rti := geo.GetSpatialAnalyzerPlugin("s2")
terms := rti.GetIndexTokens(n.shape)
for _, term := range terms {
token := analysis.Token{
Start: 0,
End: len(term),
Term: []byte(term),
Position: 1,
Type: analysis.AlphaNumeric,
}
tokens = append(tokens, &token)
}
n.length = len(tokens)
n.frequencies = analysis.TokenFrequency(tokens, n.arrayPositions, n.options)
}
func (n *GeoShapeField) Value() []byte {
return n.value
}
func (n *GeoShapeField) GoString() string {
return fmt.Sprintf("&document.GeoShapeField{Name:%s, Options: %s, Value: %s}",
n.name, n.options, n.value)
}
func (n *GeoShapeField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func (n *GeoShapeField) EncodedShape() []byte {
return n.encodedValue
}
func NewGeoShapeField(name string, arrayPositions []uint64,
coordinates [][][][]float64, typ string) *GeoShapeField {
return NewGeoShapeFieldWithIndexingOptions(name, arrayPositions,
coordinates, typ, DefaultGeoShapeIndexingOptions)
}
func NewGeoShapeFieldFromBytes(name string, arrayPositions []uint64,
value []byte) *GeoShapeField {
return &GeoShapeField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultGeoShapeIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewGeoShapeFieldWithIndexingOptions(name string, arrayPositions []uint64,
coordinates [][][][]float64, typ string,
options index.FieldIndexingOptions) *GeoShapeField {
shape := &geojson.GeoShape{
Coordinates: coordinates,
Type: typ,
}
return NewGeoShapeFieldFromShapeWithIndexingOptions(name,
arrayPositions, shape, options)
}
func NewGeoShapeFieldFromShapeWithIndexingOptions(name string, arrayPositions []uint64,
geoShape *geojson.GeoShape, options index.FieldIndexingOptions) *GeoShapeField {
var shape index.GeoJSON
var encodedValue []byte
var err error
if geoShape.Type == geo.CircleType {
shape, encodedValue, err = geo.NewGeoCircleShape(geoShape.Center, geoShape.Radius)
} else {
shape, encodedValue, err = geo.NewGeoJsonShape(geoShape.Coordinates, geoShape.Type)
}
if err != nil {
return nil
}
// extra glue bytes to work around the term splitting logic from interfering
// the custom encoding of the geoshape coordinates inside the docvalues.
encodedValue = append(geo.GlueBytes, append(encodedValue, geo.GlueBytes...)...)
// get the byte value for the geoshape.
value, err := shape.Value()
if err != nil {
return nil
}
// docvalues are always enabled for geoshape fields, even if the
// indexing options are set to not include docvalues.
// snappy compression and chunking are always skipped for geoshape
// to avoid mem copies and faster lookups.
options |= index.DocValues
options |= index.SkipDVChunking
options |= index.SkipDVCompression
return &GeoShapeField{
shape: shape,
name: name,
arrayPositions: arrayPositions,
options: options,
encodedValue: encodedValue,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
func NewGeometryCollectionFieldWithIndexingOptions(name string,
arrayPositions []uint64, coordinates [][][][][]float64, types []string,
options index.FieldIndexingOptions) *GeoShapeField {
if len(coordinates) != len(types) {
return nil
}
shapes := make([]*geojson.GeoShape, len(types))
for i := range coordinates {
shapes[i] = &geojson.GeoShape{
Coordinates: coordinates[i],
Type: types[i],
}
}
return NewGeometryCollectionFieldFromShapesWithIndexingOptions(name,
arrayPositions, shapes, options)
}
func NewGeometryCollectionFieldFromShapesWithIndexingOptions(name string,
arrayPositions []uint64, geoShapes []*geojson.GeoShape,
options index.FieldIndexingOptions) *GeoShapeField {
shape, encodedValue, err := geo.NewGeometryCollectionFromShapes(geoShapes)
if err != nil {
return nil
}
// extra glue bytes to work around the term splitting logic from interfering
// the custom encoding of the geoshape coordinates inside the docvalues.
encodedValue = append(geo.GlueBytes, append(encodedValue, geo.GlueBytes...)...)
// get the byte value for the geometryCollection.
value, err := shape.Value()
if err != nil {
return nil
}
// docvalues are always enabled for geoshape fields, even if the
// indexing options are set to not include docvalues.
// snappy compression and chunking are always skipped for geoshape
// to avoid mem copies and faster lookups.
options |= index.DocValues
options |= index.SkipDVChunking
options |= index.SkipDVCompression
return &GeoShapeField{
shape: shape,
name: name,
arrayPositions: arrayPositions,
options: options,
encodedValue: encodedValue,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
func NewGeoCircleFieldWithIndexingOptions(name string, arrayPositions []uint64,
centerPoint []float64, radius string,
options index.FieldIndexingOptions) *GeoShapeField {
shape := &geojson.GeoShape{
Center: centerPoint,
Radius: radius,
Type: geo.CircleType,
}
return NewGeoShapeFieldFromShapeWithIndexingOptions(name,
arrayPositions, shape, options)
}
// GeoShape is an implementation of the index.GeoShapeField interface.
func (n *GeoShapeField) GeoShape() (index.GeoJSON, error) {
return geojson.ParseGeoJSONShape(n.value)
}
================================================
FILE: document/field_ip.go
================================================
// Copyright (c) 2021 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"net"
"reflect"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeIPField int
func init() {
var f IPField
reflectStaticSizeIPField = int(reflect.TypeOf(f).Size())
}
const DefaultIPIndexingOptions = index.StoreField | index.IndexField | index.DocValues
type IPField struct {
name string
arrayPositions []uint64
options index.FieldIndexingOptions
value net.IP
numPlainTextBytes uint64
length int
frequencies index.TokenFrequencies
}
func (b *IPField) Size() int {
var freqSize int
if b.frequencies != nil {
freqSize = b.frequencies.Size()
}
return reflectStaticSizeIPField + size.SizeOfPtr +
len(b.name) +
len(b.arrayPositions)*size.SizeOfUint64 +
len(b.value) +
freqSize
}
func (b *IPField) Name() string {
return b.name
}
func (b *IPField) ArrayPositions() []uint64 {
return b.arrayPositions
}
func (b *IPField) Options() index.FieldIndexingOptions {
return b.options
}
func (n *IPField) EncodedFieldType() byte {
return 'i'
}
func (n *IPField) AnalyzedLength() int {
return n.length
}
func (n *IPField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return n.frequencies
}
func (b *IPField) Analyze() {
tokens := analysis.TokenStream{
&analysis.Token{
Start: 0,
End: len(b.value),
Term: b.value,
Position: 1,
Type: analysis.IP,
},
}
b.length = 1
b.frequencies = analysis.TokenFrequency(tokens, b.arrayPositions, b.options)
}
func (b *IPField) Value() []byte {
return b.value
}
func (b *IPField) IP() (net.IP, error) {
return net.IP(b.value), nil
}
func (b *IPField) GoString() string {
return fmt.Sprintf("&document.IPField{Name:%s, Options: %s, Value: %s}", b.name, b.options, net.IP(b.value))
}
func (b *IPField) NumPlainTextBytes() uint64 {
return b.numPlainTextBytes
}
func NewIPFieldFromBytes(name string, arrayPositions []uint64, value []byte) *IPField {
return &IPField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultIPIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewIPField(name string, arrayPositions []uint64, v net.IP) *IPField {
return NewIPFieldWithIndexingOptions(name, arrayPositions, v, DefaultIPIndexingOptions)
}
func NewIPFieldWithIndexingOptions(name string, arrayPositions []uint64, b net.IP, options index.FieldIndexingOptions) *IPField {
v := b.To16()
return &IPField{
name: name,
arrayPositions: arrayPositions,
value: v,
options: options,
numPlainTextBytes: net.IPv6len,
}
}
================================================
FILE: document/field_ip_test.go
================================================
// Copyright (c) 2021 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"bytes"
"net"
"testing"
)
func TestIPField(t *testing.T) {
nf := NewIPField("ip", []uint64{}, net.IPv4(192, 168, 1, 1))
nf.Analyze()
if nf.length != 1 {
t.Errorf("expected 1 token")
}
if len(nf.value) != 16 {
t.Errorf("stored value should be in 16 byte ipv6 format")
}
if !bytes.Equal(nf.value, []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0xff, 0xff, 192, 168, 1, 1}) {
t.Errorf("wrong value stored, expected 192.168.1.1, got %q", nf.value.String())
}
if len(nf.frequencies) != 1 {
t.Errorf("expected 1 token freqs")
}
}
================================================
FILE: document/field_numeric.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeNumericField int
func init() {
var f NumericField
reflectStaticSizeNumericField = int(reflect.TypeOf(f).Size())
}
const DefaultNumericIndexingOptions = index.StoreField | index.IndexField | index.DocValues
const DefaultPrecisionStep uint = 4
type NumericField struct {
name string
arrayPositions []uint64
options index.FieldIndexingOptions
value numeric.PrefixCoded
numPlainTextBytes uint64
length int
frequencies index.TokenFrequencies
}
func (n *NumericField) Size() int {
var freqSize int
if n.frequencies != nil {
freqSize = n.frequencies.Size()
}
return reflectStaticSizeNumericField + size.SizeOfPtr +
len(n.name) +
len(n.arrayPositions)*size.SizeOfUint64 +
len(n.value) +
freqSize
}
func (n *NumericField) Name() string {
return n.name
}
func (n *NumericField) ArrayPositions() []uint64 {
return n.arrayPositions
}
func (n *NumericField) Options() index.FieldIndexingOptions {
return n.options
}
func (n *NumericField) EncodedFieldType() byte {
return 'n'
}
func (n *NumericField) AnalyzedLength() int {
return n.length
}
func (n *NumericField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return n.frequencies
}
func (n *NumericField) Analyze() {
tokens := make(analysis.TokenStream, 0)
tokens = append(tokens, &analysis.Token{
Start: 0,
End: len(n.value),
Term: n.value,
Position: 1,
Type: analysis.Numeric,
})
original, err := n.value.Int64()
if err == nil {
shift := DefaultPrecisionStep
for shift < 64 {
shiftEncoded, err := numeric.NewPrefixCodedInt64(original, shift)
if err != nil {
break
}
token := analysis.Token{
Start: 0,
End: len(shiftEncoded),
Term: shiftEncoded,
Position: 1,
Type: analysis.Numeric,
}
tokens = append(tokens, &token)
shift += DefaultPrecisionStep
}
}
n.length = len(tokens)
n.frequencies = analysis.TokenFrequency(tokens, n.arrayPositions, n.options)
}
func (n *NumericField) Value() []byte {
return n.value
}
func (n *NumericField) Number() (float64, error) {
i64, err := n.value.Int64()
if err != nil {
return 0.0, err
}
return numeric.Int64ToFloat64(i64), nil
}
func (n *NumericField) GoString() string {
return fmt.Sprintf("&document.NumericField{Name:%s, Options: %s, Value: %s}", n.name, n.options, n.value)
}
func (n *NumericField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func NewNumericFieldFromBytes(name string, arrayPositions []uint64, value []byte) *NumericField {
return &NumericField{
name: name,
arrayPositions: arrayPositions,
value: value,
options: DefaultNumericIndexingOptions,
numPlainTextBytes: uint64(len(value)),
}
}
func NewNumericField(name string, arrayPositions []uint64, number float64) *NumericField {
return NewNumericFieldWithIndexingOptions(name, arrayPositions, number, DefaultNumericIndexingOptions)
}
func NewNumericFieldWithIndexingOptions(name string, arrayPositions []uint64, number float64, options index.FieldIndexingOptions) *NumericField {
numberInt64 := numeric.Float64ToInt64(number)
prefixCoded := numeric.MustNewPrefixCodedInt64(numberInt64, 0)
return &NumericField{
name: name,
arrayPositions: arrayPositions,
value: prefixCoded,
options: options,
// not correct, just a place holder until we revisit how fields are
// represented and can fix this better
numPlainTextBytes: uint64(8),
}
}
================================================
FILE: document/field_numeric_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"testing"
)
func TestNumericField(t *testing.T) {
nf := NewNumericField("age", []uint64{}, 3.4)
nf.Analyze()
numTokens := nf.AnalyzedLength()
tokenFreqs := nf.AnalyzedTokenFrequencies()
if numTokens != 16 {
t.Errorf("expected 16 tokens")
}
if len(tokenFreqs) != 16 {
t.Errorf("expected 16 token freqs")
}
}
================================================
FILE: document/field_synonym.go
================================================
// Copyright (c) 2024 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"reflect"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeSynonymField int
func init() {
var f SynonymField
reflectStaticSizeSynonymField = int(reflect.TypeOf(f).Size())
}
const DefaultSynonymIndexingOptions = index.IndexField
type SynonymField struct {
name string
analyzer analysis.Analyzer
options index.FieldIndexingOptions
input []string
synonyms []string
numPlainTextBytes uint64
// populated during analysis
synonymMap map[string][]string
}
func (s *SynonymField) Size() int {
return reflectStaticSizeSynonymField + size.SizeOfPtr +
len(s.name)
}
func (s *SynonymField) Name() string {
return s.name
}
func (s *SynonymField) ArrayPositions() []uint64 {
return nil
}
func (s *SynonymField) Options() index.FieldIndexingOptions {
return s.options
}
func (s *SynonymField) NumPlainTextBytes() uint64 {
return s.numPlainTextBytes
}
func (s *SynonymField) AnalyzedLength() int {
return 0
}
func (s *SynonymField) EncodedFieldType() byte {
return 'y'
}
func (s *SynonymField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return nil
}
func (s *SynonymField) Analyze() {
var analyzedInput []string
if len(s.input) > 0 {
analyzedInput = make([]string, 0, len(s.input))
for _, term := range s.input {
analyzedTerm := analyzeSynonymTerm(term, s.analyzer)
if analyzedTerm != "" {
analyzedInput = append(analyzedInput, analyzedTerm)
}
}
}
analyzedSynonyms := make([]string, 0, len(s.synonyms))
for _, syn := range s.synonyms {
analyzedTerm := analyzeSynonymTerm(syn, s.analyzer)
if analyzedTerm != "" {
analyzedSynonyms = append(analyzedSynonyms, analyzedTerm)
}
}
s.synonymMap = processSynonymData(analyzedInput, analyzedSynonyms)
}
func (s *SynonymField) Value() []byte {
return nil
}
func (s *SynonymField) IterateSynonyms(visitor func(term string, synonyms []string)) {
for term, synonyms := range s.synonymMap {
visitor(term, synonyms)
}
}
func NewSynonymField(name string, analyzer analysis.Analyzer, input []string, synonyms []string) *SynonymField {
return &SynonymField{
name: name,
analyzer: analyzer,
options: DefaultSynonymIndexingOptions,
input: input,
synonyms: synonyms,
}
}
func processSynonymData(input []string, synonyms []string) map[string][]string {
var synonymMap map[string][]string
if len(input) > 0 {
// Map each term to the same list of synonyms.
synonymMap = make(map[string][]string, len(input))
for _, term := range input {
synonymMap[term] = synonyms
}
} else {
synonymMap = make(map[string][]string, len(synonyms))
// Precompute a map where each synonym points to all other synonyms.
for i, elem := range synonyms {
synonymMap[elem] = make([]string, 0, len(synonyms)-1)
for j, otherElem := range synonyms {
if i != j {
synonymMap[elem] = append(synonymMap[elem], otherElem)
}
}
}
}
return synonymMap
}
func analyzeSynonymTerm(term string, analyzer analysis.Analyzer) string {
tokenStream := analyzer.Analyze([]byte(term))
if len(tokenStream) == 1 {
return string(tokenStream[0].Term)
}
return ""
}
================================================
FILE: document/field_text.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeTextField int
func init() {
var f TextField
reflectStaticSizeTextField = int(reflect.TypeOf(f).Size())
}
const DefaultTextIndexingOptions = index.IndexField | index.DocValues
type TextField struct {
name string
arrayPositions []uint64
options index.FieldIndexingOptions
analyzer analysis.Analyzer
value []byte
numPlainTextBytes uint64
length int
frequencies index.TokenFrequencies
}
func (t *TextField) Size() int {
var freqSize int
if t.frequencies != nil {
freqSize = t.frequencies.Size()
}
return reflectStaticSizeTextField + size.SizeOfPtr +
len(t.name) +
len(t.arrayPositions)*size.SizeOfUint64 +
len(t.value) +
freqSize
}
func (t *TextField) Name() string {
return t.name
}
func (t *TextField) ArrayPositions() []uint64 {
return t.arrayPositions
}
func (t *TextField) Options() index.FieldIndexingOptions {
return t.options
}
func (t *TextField) EncodedFieldType() byte {
return 't'
}
func (t *TextField) AnalyzedLength() int {
return t.length
}
func (t *TextField) AnalyzedTokenFrequencies() index.TokenFrequencies {
return t.frequencies
}
func (t *TextField) Analyze() {
var tokens analysis.TokenStream
if t.analyzer != nil {
bytesToAnalyze := t.Value()
if t.options.IsStored() {
// need to copy
bytesCopied := make([]byte, len(bytesToAnalyze))
copy(bytesCopied, bytesToAnalyze)
bytesToAnalyze = bytesCopied
}
tokens = t.analyzer.Analyze(bytesToAnalyze)
} else {
tokens = analysis.TokenStream{
&analysis.Token{
Start: 0,
End: len(t.value),
Term: t.value,
Position: 1,
Type: analysis.AlphaNumeric,
},
}
}
t.length = len(tokens) // number of tokens in this doc field
t.frequencies = analysis.TokenFrequency(tokens, t.arrayPositions, t.options)
}
func (t *TextField) Analyzer() analysis.Analyzer {
return t.analyzer
}
func (t *TextField) Value() []byte {
return t.value
}
func (t *TextField) Text() string {
return string(t.value)
}
func (t *TextField) GoString() string {
return fmt.Sprintf("&document.TextField{Name:%s, Options: %s, Analyzer: %v, Value: %s, ArrayPositions: %v}", t.name, t.options, t.analyzer, t.value, t.arrayPositions)
}
func (t *TextField) NumPlainTextBytes() uint64 {
return t.numPlainTextBytes
}
func NewTextField(name string, arrayPositions []uint64, value []byte) *TextField {
return NewTextFieldWithIndexingOptions(name, arrayPositions, value, DefaultTextIndexingOptions)
}
func NewTextFieldWithIndexingOptions(name string, arrayPositions []uint64, value []byte, options index.FieldIndexingOptions) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: options,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
func NewTextFieldWithAnalyzer(name string, arrayPositions []uint64, value []byte, analyzer analysis.Analyzer) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: DefaultTextIndexingOptions,
analyzer: analyzer,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
func NewTextFieldCustom(name string, arrayPositions []uint64, value []byte, options index.FieldIndexingOptions, analyzer analysis.Analyzer) *TextField {
return &TextField{
name: name,
arrayPositions: arrayPositions,
options: options,
analyzer: analyzer,
value: value,
numPlainTextBytes: uint64(len(value)),
}
}
================================================
FILE: document/field_vector.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package document
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeVectorField int
func init() {
var f VectorField
reflectStaticSizeVectorField = int(reflect.TypeOf(f).Size())
}
const DefaultVectorIndexingOptions = index.IndexField
type VectorField struct {
name string
dims int // Dimensionality of the vector
similarity string // Similarity metric to use for scoring
options index.FieldIndexingOptions
value []float32
numPlainTextBytes uint64
vectorIndexOptimizedFor string // Optimization applied to this index.
}
func (n *VectorField) Size() int {
return reflectStaticSizeVectorField + size.SizeOfPtr +
len(n.name) +
len(n.similarity) +
len(n.vectorIndexOptimizedFor) +
int(numBytesFloat32s(n.value))
}
func (n *VectorField) Name() string {
return n.name
}
func (n *VectorField) ArrayPositions() []uint64 {
return nil
}
func (n *VectorField) Options() index.FieldIndexingOptions {
return n.options
}
func (n *VectorField) NumPlainTextBytes() uint64 {
return n.numPlainTextBytes
}
func (n *VectorField) AnalyzedLength() int {
// vectors aren't analyzed
return 0
}
func (n *VectorField) EncodedFieldType() byte {
return 'v'
}
func (n *VectorField) AnalyzedTokenFrequencies() index.TokenFrequencies {
// vectors aren't analyzed
return nil
}
func (n *VectorField) Analyze() {
// vectors aren't analyzed
}
func (n *VectorField) Value() []byte {
return nil
}
func (n *VectorField) GoString() string {
return fmt.Sprintf("&document.VectorField{Name:%s, Options: %s, "+
"Value: %+v}", n.name, n.options, n.value)
}
// For the sake of not polluting the API, we are keeping arrayPositions as a
// parameter, but it is not used.
func NewVectorField(name string, arrayPositions []uint64,
vector []float32, dims int, similarity, vectorIndexOptimizedFor string) *VectorField {
return NewVectorFieldWithIndexingOptions(name, arrayPositions,
vector, dims, similarity, vectorIndexOptimizedFor,
DefaultVectorIndexingOptions)
}
// For the sake of not polluting the API, we are keeping arrayPositions as a
// parameter, but it is not used.
func NewVectorFieldWithIndexingOptions(name string, arrayPositions []uint64,
vector []float32, dims int, similarity, vectorIndexOptimizedFor string,
options index.FieldIndexingOptions) *VectorField {
// ensure the options are set to not store/index term vectors/doc values
options &^= index.StoreField | index.IncludeTermVectors | index.DocValues
// skip freq/norms for vector field
options |= index.SkipFreqNorm
// bivf-sq8 indexes only supports hamming distance for the primary
// binary index. Similarity here is used for the backing flat index,
// which is set to cosine similarity for recall reasons
if index.OptimizationRequiresBinaryIndex(vectorIndexOptimizedFor) {
similarity = index.CosineSimilarity
}
return &VectorField{
name: name,
dims: dims,
similarity: similarity,
options: options,
value: vector,
numPlainTextBytes: numBytesFloat32s(vector),
vectorIndexOptimizedFor: vectorIndexOptimizedFor,
}
}
func numBytesFloat32s(value []float32) uint64 {
return uint64(len(value) * size.SizeOfFloat32)
}
// -----------------------------------------------------------------------------
// Following methods help in implementing the bleve_index_api's VectorField
// interface.
func (n *VectorField) Vector() []float32 {
return n.value
}
func (n *VectorField) Dims() int {
return n.dims
}
func (n *VectorField) Similarity() string {
return n.similarity
}
func (n *VectorField) IndexOptimizedFor() string {
return n.vectorIndexOptimizedFor
}
================================================
FILE: document/field_vector_base64.go
================================================
// Copyright (c) 2024 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package document
import (
"encoding/base64"
"encoding/binary"
"fmt"
"math"
"reflect"
"github.com/blevesearch/bleve/v2/size"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeVectorBase64Field int
func init() {
var f VectorBase64Field
reflectStaticSizeVectorBase64Field = int(reflect.TypeOf(f).Size())
}
type VectorBase64Field struct {
vectorField *VectorField
base64Encoding string
}
func (n *VectorBase64Field) Size() int {
var vecFieldSize int
if n.vectorField != nil {
vecFieldSize = n.vectorField.Size()
}
return reflectStaticSizeVectorBase64Field + size.SizeOfPtr +
len(n.base64Encoding) +
vecFieldSize
}
func (n *VectorBase64Field) Name() string {
return n.vectorField.Name()
}
func (n *VectorBase64Field) ArrayPositions() []uint64 {
return n.vectorField.ArrayPositions()
}
func (n *VectorBase64Field) Options() index.FieldIndexingOptions {
return n.vectorField.Options()
}
func (n *VectorBase64Field) NumPlainTextBytes() uint64 {
return n.vectorField.NumPlainTextBytes()
}
func (n *VectorBase64Field) AnalyzedLength() int {
return n.vectorField.AnalyzedLength()
}
func (n *VectorBase64Field) EncodedFieldType() byte {
return 'e'
}
func (n *VectorBase64Field) AnalyzedTokenFrequencies() index.TokenFrequencies {
return n.vectorField.AnalyzedTokenFrequencies()
}
func (n *VectorBase64Field) Analyze() {
}
func (n *VectorBase64Field) Value() []byte {
return n.vectorField.Value()
}
func (n *VectorBase64Field) GoString() string {
return fmt.Sprintf("&document.vectorFieldBase64Field{Name:%s, Options: %s, "+
"Value: %+v}", n.vectorField.Name(), n.vectorField.Options(), n.vectorField.Value())
}
// For the sake of not polluting the API, we are keeping arrayPositions as a
// parameter, but it is not used.
func NewVectorBase64Field(name string, arrayPositions []uint64, vectorBase64 string,
dims int, similarity, vectorIndexOptimizedFor string) (*VectorBase64Field, error) {
decodedVector, err := DecodeVector(vectorBase64)
if err != nil {
return nil, err
}
return &VectorBase64Field{
vectorField: NewVectorFieldWithIndexingOptions(name, arrayPositions,
decodedVector, dims, similarity,
vectorIndexOptimizedFor, DefaultVectorIndexingOptions),
base64Encoding: vectorBase64,
}, nil
}
// This function takes a base64 encoded string and decodes it into
// a vector.
func DecodeVector(encodedValue string) ([]float32, error) {
// We first decode the encoded string into a byte array.
decodedString, err := base64.StdEncoding.DecodeString(encodedValue)
if err != nil {
return nil, err
}
// The array is expected to be divisible by 4 because each float32
// should occupy 4 bytes
if len(decodedString)%size.SizeOfFloat32 != 0 {
return nil, fmt.Errorf("decoded byte array not divisible by %d", size.SizeOfFloat32)
}
dims := int(len(decodedString) / size.SizeOfFloat32)
if dims <= 0 {
return nil, fmt.Errorf("unable to decode encoded vector")
}
decodedVector := make([]float32, dims)
// We iterate through the array 4 bytes at a time and convert each of
// them to a float32 value by reading them in a little endian notation
for i := 0; i < dims; i++ {
bytes := decodedString[i*size.SizeOfFloat32 : (i+1)*size.SizeOfFloat32]
entry := math.Float32frombits(binary.LittleEndian.Uint32(bytes))
if !util.IsValidFloat32(float64(entry)) {
return nil, fmt.Errorf("invalid float32 value: %f", entry)
}
decodedVector[i] = entry
}
return decodedVector, nil
}
func (n *VectorBase64Field) Vector() []float32 {
return n.vectorField.Vector()
}
func (n *VectorBase64Field) Dims() int {
return n.vectorField.Dims()
}
func (n *VectorBase64Field) Similarity() string {
return n.vectorField.Similarity()
}
func (n *VectorBase64Field) IndexOptimizedFor() string {
return n.vectorField.IndexOptimizedFor()
}
================================================
FILE: document/field_vector_base64_test.go
================================================
// Copyright (c) 2024 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package document
import (
"bytes"
"encoding/base64"
"encoding/binary"
"fmt"
"math/rand"
"testing"
)
func TestDecodeVector(t *testing.T) {
vec := make([]float32, 2048)
for i := range vec {
vec[i] = rand.Float32()
}
vecBytes := bytifyVec(vec)
encodedVec := base64.StdEncoding.EncodeToString(vecBytes)
decodedVector, err := DecodeVector(encodedVec)
if err != nil {
t.Error(err)
}
if len(decodedVector) != len(vec) {
t.Errorf("Decoded vector dimensions not same as original vector dimensions")
}
for i := range vec {
if vec[i] != decodedVector[i] {
t.Fatalf("Decoded vector not the same as original vector %v != %v", vec[i], decodedVector[i])
}
}
}
func BenchmarkDecodeVector128(b *testing.B) {
vec := make([]float32, 128)
for i := range vec {
vec[i] = rand.Float32()
}
vecBytes := bytifyVec(vec)
encodedVec := base64.StdEncoding.EncodeToString(vecBytes)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = DecodeVector(encodedVec)
}
}
func BenchmarkDecodeVector784(b *testing.B) {
vec := make([]float32, 784)
for i := range vec {
vec[i] = rand.Float32()
}
vecBytes := bytifyVec(vec)
encodedVec := base64.StdEncoding.EncodeToString(vecBytes)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = DecodeVector(encodedVec)
}
}
func BenchmarkDecodeVector1536(b *testing.B) {
vec := make([]float32, 1536)
for i := range vec {
vec[i] = rand.Float32()
}
vecBytes := bytifyVec(vec)
encodedVec := base64.StdEncoding.EncodeToString(vecBytes)
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, _ = DecodeVector(encodedVec)
}
}
func bytifyVec(vec []float32) []byte {
buf := new(bytes.Buffer)
for _, v := range vec {
err := binary.Write(buf, binary.LittleEndian, v)
if err != nil {
fmt.Println(err)
}
}
return buf.Bytes()
}
================================================
FILE: error.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
// Constant Error values which can be compared to determine the type of error
const (
ErrorIndexPathExists Error = iota
ErrorIndexPathDoesNotExist
ErrorIndexMetaMissing
ErrorIndexMetaCorrupt
ErrorIndexClosed
ErrorAliasMulti
ErrorAliasEmpty
ErrorUnknownIndexType
ErrorEmptyID
ErrorIndexReadInconsistency
ErrorTwoPhaseSearchInconsistency
ErrorSynonymSearchNotSupported
)
// Error represents a more strongly typed bleve error for detecting
// and handling specific types of errors.
type Error int
func (e Error) Error() string {
return errorMessages[e]
}
var errorMessages = map[Error]string{
ErrorIndexPathExists: "cannot create new index, path already exists",
ErrorIndexPathDoesNotExist: "cannot open index, path does not exist",
ErrorIndexMetaMissing: "cannot open index, metadata missing",
ErrorIndexMetaCorrupt: "cannot open index, metadata corrupt",
ErrorIndexClosed: "index is closed",
ErrorAliasMulti: "cannot perform single index operation on multiple index alias",
ErrorAliasEmpty: "cannot perform operation on empty alias",
ErrorUnknownIndexType: "unknown index type",
ErrorEmptyID: "document ID cannot be empty",
ErrorIndexReadInconsistency: "index read inconsistency detected",
ErrorTwoPhaseSearchInconsistency: "2-phase search failed, likely due to an overlapping topology change",
ErrorSynonymSearchNotSupported: "synonym search not supported",
}
================================================
FILE: examples_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"fmt"
"os"
"testing"
"time"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/highlight/highlighter/ansi"
)
var indexMapping mapping.IndexMapping
var exampleIndex Index
var err error
func TestMain(m *testing.M) {
err = os.RemoveAll("path_to_index")
if err != nil {
panic(err)
}
toRun := m.Run()
if exampleIndex != nil {
err = exampleIndex.Close()
if err != nil {
panic(err)
}
}
err = os.RemoveAll("path_to_index")
if err != nil {
panic(err)
}
os.Exit(toRun)
}
func ExampleNew() {
indexMapping = NewIndexMapping()
exampleIndex, err = New("path_to_index", indexMapping)
if err != nil {
panic(err)
}
count, err := exampleIndex.DocCount()
if err != nil {
panic(err)
}
fmt.Println(count)
// Output:
// 0
}
func ExampleIndex_indexing() {
data := struct {
Name string
Created time.Time
Age int
}{Name: "named one", Created: time.Now(), Age: 50}
data2 := struct {
Name string
Created time.Time
Age int
}{Name: "great nameless one", Created: time.Now(), Age: 25}
// index some data
err = exampleIndex.Index("document id 1", data)
if err != nil {
panic(err)
}
err = exampleIndex.Index("document id 2", data2)
if err != nil {
panic(err)
}
// 2 documents have been indexed
count, err := exampleIndex.DocCount()
if err != nil {
panic(err)
}
fmt.Println(count)
// Output:
// 2
}
// Examples for query related functions
func ExampleNewMatchQuery() {
// finds documents with fields fully matching the given query text
query := NewMatchQuery("named one")
searchRequest := NewSearchRequest(query)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(searchResults.Hits[0].ID)
// Output:
// document id 1
}
func ExampleNewMatchAllQuery() {
// finds all documents in the index
query := NewMatchAllQuery()
searchRequest := NewSearchRequest(query)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(len(searchResults.Hits))
// Output:
// 2
}
func ExampleNewMatchNoneQuery() {
// matches no documents in the index
query := NewMatchNoneQuery()
searchRequest := NewSearchRequest(query)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(len(searchResults.Hits))
// Output:
// 0
}
func ExampleNewMatchPhraseQuery() {
// finds all documents with the given phrase in the index
query := NewMatchPhraseQuery("nameless one")
searchRequest := NewSearchRequest(query)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(searchResults.Hits[0].ID)
// Output:
// document id 2
}
func ExampleNewNumericRangeQuery() {
value1 := float64(11)
value2 := float64(100)
data := struct{ Priority float64 }{Priority: float64(15)}
data2 := struct{ Priority float64 }{Priority: float64(10)}
err = exampleIndex.Index("document id 3", data)
if err != nil {
panic(err)
}
err = exampleIndex.Index("document id 4", data2)
if err != nil {
panic(err)
}
query := NewNumericRangeQuery(&value1, &value2)
searchRequest := NewSearchRequest(query)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(searchResults.Hits[0].ID)
// Output:
// document id 3
}
func ExampleNewNumericRangeInclusiveQuery() {
value1 := float64(10)
value2 := float64(100)
v1incl := false
v2incl := false
query := NewNumericRangeInclusiveQuery(&value1, &value2, &v1incl, &v2incl)
searchRequest := NewSearchRequest(query)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(searchResults.Hits[0].ID)
// Output:
// document id 3
}
func ExampleNewPhraseQuery() {
// finds all documents with the given phrases in the given field in the index
query := NewPhraseQuery([]string{"nameless", "one"}, "Name")
searchRequest := NewSearchRequest(query)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(searchResults.Hits[0].ID)
// Output:
// document id 2
}
func ExampleNewPrefixQuery() {
// finds all documents with terms having the given prefix in the index
query := NewPrefixQuery("name")
searchRequest := NewSearchRequest(query)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(len(searchResults.Hits))
// Output:
// 2
}
func ExampleNewQueryStringQuery() {
query := NewQueryStringQuery("+one -great")
searchRequest := NewSearchRequest(query)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(searchResults.Hits[0].ID)
// Output:
// document id 1
}
func ExampleNewTermQuery() {
query := NewTermQuery("great")
searchRequest := NewSearchRequest(query)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(searchResults.Hits[0].ID)
// Output:
// document id 2
}
func ExampleNewFacetRequest() {
facet := NewFacetRequest("Name", 1)
query := NewMatchAllQuery()
searchRequest := NewSearchRequest(query)
searchRequest.AddFacet("facet name", facet)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
// total number of terms
fmt.Println(searchResults.Facets["facet name"].Total)
// number of docs with no value for this field
fmt.Println(searchResults.Facets["facet name"].Missing)
// term with highest occurrences in field name
fmt.Println(searchResults.Facets["facet name"].Terms.Terms()[0].Term)
// Output:
// 5
// 2
// one
}
func ExampleFacetRequest_AddDateTimeRange() {
facet := NewFacetRequest("Created", 1)
facet.AddDateTimeRange("range name", time.Unix(0, 0), time.Now())
query := NewMatchAllQuery()
searchRequest := NewSearchRequest(query)
searchRequest.AddFacet("facet name", facet)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
// dates in field Created since starting of unix time till now
fmt.Println(searchResults.Facets["facet name"].DateRanges[0].Count)
// Output:
// 2
}
func ExampleFacetRequest_AddNumericRange() {
value1 := float64(11)
facet := NewFacetRequest("Priority", 1)
facet.AddNumericRange("range name", &value1, nil)
query := NewMatchAllQuery()
searchRequest := NewSearchRequest(query)
searchRequest.AddFacet("facet name", facet)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
// number documents with field Priority in the given range
fmt.Println(searchResults.Facets["facet name"].NumericRanges[0].Count)
// Output:
// 1
}
func ExampleNewHighlight() {
query := NewMatchQuery("nameless")
searchRequest := NewSearchRequest(query)
searchRequest.Highlight = NewHighlight()
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(searchResults.Hits[0].Fragments["Name"][0])
// Output:
// great
nameless one
}
func ExampleNewHighlightWithStyle() {
query := NewMatchQuery("nameless")
searchRequest := NewSearchRequest(query)
searchRequest.Highlight = NewHighlightWithStyle(ansi.Name)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(searchResults.Hits[0].Fragments["Name"][0])
// Output:
// great [43mnameless[0m one
}
func ExampleSearchRequest_AddFacet() {
facet := NewFacetRequest("Name", 1)
query := NewMatchAllQuery()
searchRequest := NewSearchRequest(query)
searchRequest.AddFacet("facet name", facet)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
// total number of terms
fmt.Println(searchResults.Facets["facet name"].Total)
// number of docs with no value for this field
fmt.Println(searchResults.Facets["facet name"].Missing)
// term with highest occurrences in field name
fmt.Println(searchResults.Facets["facet name"].Terms.Terms()[0].Term)
// Output:
// 5
// 2
// one
}
func ExampleNewSearchRequest() {
// finds documents with fields fully matching the given query text
query := NewMatchQuery("named one")
searchRequest := NewSearchRequest(query)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(searchResults.Hits[0].ID)
// Output:
// document id 1
}
func ExampleNewBooleanQuery() {
must := NewMatchQuery("one")
mustNot := NewMatchQuery("great")
query := NewBooleanQuery()
query.AddMust(must)
query.AddMustNot(mustNot)
searchRequest := NewSearchRequest(query)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(searchResults.Hits[0].ID)
// Output:
// document id 1
}
func ExampleNewConjunctionQuery() {
conjunct1 := NewMatchQuery("great")
conjunct2 := NewMatchQuery("one")
query := NewConjunctionQuery(conjunct1, conjunct2)
searchRequest := NewSearchRequest(query)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(searchResults.Hits[0].ID)
// Output:
// document id 2
}
func ExampleNewDisjunctionQuery() {
disjunct1 := NewMatchQuery("great")
disjunct2 := NewMatchQuery("named")
query := NewDisjunctionQuery(disjunct1, disjunct2)
searchRequest := NewSearchRequest(query)
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(len(searchResults.Hits))
// Output:
// 2
}
func ExampleSearchRequest_SortBy() {
// find docs containing "one", order by Age instead of score
query := NewMatchQuery("one")
searchRequest := NewSearchRequest(query)
searchRequest.SortBy([]string{"Age"})
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(searchResults.Hits[0].ID)
fmt.Println(searchResults.Hits[1].ID)
// Output:
// document id 2
// document id 1
}
func ExampleSearchRequest_SortByCustom() {
// find all docs, order by Age, with docs missing Age field first
query := NewMatchAllQuery()
searchRequest := NewSearchRequest(query)
searchRequest.SortByCustom(search.SortOrder{
&search.SortField{
Field: "Age",
Missing: search.SortFieldMissingFirst,
},
&search.SortDocID{},
})
searchResults, err := exampleIndex.Search(searchRequest)
if err != nil {
panic(err)
}
fmt.Println(searchResults.Hits[0].ID)
fmt.Println(searchResults.Hits[1].ID)
fmt.Println(searchResults.Hits[2].ID)
fmt.Println(searchResults.Hits[3].ID)
// Output:
// document id 3
// document id 4
// document id 2
// document id 1
}
================================================
FILE: fusion/fusion.go
================================================
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fusion
import (
"github.com/blevesearch/bleve/v2/search"
)
type FusionResult struct {
Hits search.DocumentMatchCollection
Total uint64
MaxScore float64
}
================================================
FILE: fusion/rrf.go
================================================
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fusion
import (
"fmt"
"github.com/blevesearch/bleve/v2/search"
)
// formatRRFMessage builds the explanation string for a single component of the
// Reciprocal Rank Fusion calculation.
func formatRRFMessage(weight float64, rank int, rankConstant int) string {
return fmt.Sprintf("rrf score (weight=%.3f, rank=%d, rank_constant=%d), normalized score of", weight, rank, rankConstant)
}
// ReciprocalRankFusion applies Reciprocal Rank Fusion across the primary FTS
// results and each KNN sub-query. Ranks are limited to `windowSize` per source,
// weighted, and combined into a single fused score, with optional explanation
// details.
func ReciprocalRankFusion(hits search.DocumentMatchCollection, weights []float64, rankConstant int, windowSize int, numKNNQueries int, explain bool) *FusionResult {
nHits := len(hits)
if nHits == 0 || windowSize == 0 {
return &FusionResult{
Hits: search.DocumentMatchCollection{},
Total: 0,
MaxScore: 0.0,
}
}
limit := min(nHits, windowSize)
// precompute rank+scores to prevent additional division ops later
rankReciprocals := make([]float64, limit)
for i := range rankReciprocals {
rankReciprocals[i] = 1.0 / float64(rankConstant+i+1)
}
// init explanations if required
var fusionExpl map[*search.DocumentMatch][]*search.Explanation
if explain {
fusionExpl = make(map[*search.DocumentMatch][]*search.Explanation, nHits)
}
// The code here mainly deals with obtaining rank/score for fts hits.
// First sort hits by score
sortDocMatchesByScore(hits)
// Calculate fts rank+scores
ftsWeight := weights[0]
for i := 0; i < nHits; i++ {
if i < windowSize {
hit := hits[i]
// No fts scores from this hit onwards, break loop
if hit.Score == 0.0 {
break
}
contrib := ftsWeight * rankReciprocals[i]
hit.Score = contrib
if explain {
expl := getFusionExplAt(
hit,
0,
contrib,
formatRRFMessage(ftsWeight, i+1, rankConstant),
)
fusionExpl[hit] = append(fusionExpl[hit], expl)
}
} else {
// These FTS hits are not counted in the results, so set to 0
hits[i].Score = 0.0
}
}
// Code from here is to calculate knn ranks and scores
// iterate over each knn query and calculate knn rank+scores
for queryIdx := 0; queryIdx < numKNNQueries; queryIdx++ {
knnWeight := weights[queryIdx+1]
// Sorts hits in decreasing order of hit.ScoreBreakdown[i]
sortDocMatchesByBreakdown(hits, queryIdx)
for i := 0; i < nHits; i++ {
// break if score breakdown doesn't exist (sort function puts these hits at the end)
// or if we go past the windowSize
_, scoreBreakdownExists := scoreBreakdownForQuery(hits[i], queryIdx)
if i >= windowSize || !scoreBreakdownExists {
break
}
hit := hits[i]
contrib := knnWeight * rankReciprocals[i]
hit.Score += contrib
if explain {
expl := getFusionExplAt(
hit,
queryIdx+1,
contrib,
formatRRFMessage(knnWeight, i+1, rankConstant),
)
fusionExpl[hit] = append(fusionExpl[hit], expl)
}
}
}
var maxScore float64
for _, hit := range hits {
if explain {
finalizeFusionExpl(hit, fusionExpl[hit])
}
hit.ScoreBreakdown = nil
if hit.Score > maxScore {
maxScore = hit.Score
}
}
sortDocMatchesByScore(hits)
if nHits > windowSize {
hits = hits[:windowSize]
}
return &FusionResult{
Hits: hits,
Total: uint64(len(hits)),
MaxScore: maxScore,
}
}
================================================
FILE: fusion/rrf_test.go
================================================
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fusion
import (
"math"
"testing"
"github.com/blevesearch/bleve/v2/search"
)
const epsilon float64 = 1e-3
func nearlyEqual(a float64, b float64, epsilon float64) bool {
return math.Abs(a-b) < epsilon
}
func compareFusionResults(a, b FusionResult) bool {
if a.Total != b.Total || !nearlyEqual(a.MaxScore, b.MaxScore, epsilon) || len(a.Hits) != len(b.Hits) {
return false
}
for i := range a.Hits {
if a.Hits[i].ID != b.Hits[i].ID || !nearlyEqual(a.Hits[i].Score, b.Hits[i].Score, epsilon) {
return false
}
if a.Hits[i].ScoreBreakdown != nil || b.Hits[i].ScoreBreakdown != nil {
return false
}
}
return true
}
func TestReciprocalRankFusion(t *testing.T) {
tests := []struct {
name string
hits search.DocumentMatchCollection
weights []float64
rank_constant int
window_size int
numKNNQueries int
want FusionResult
}{
{
name: "empty hits",
hits: search.DocumentMatchCollection{},
weights: []float64{0.5, 0.5},
rank_constant: 60,
window_size: 10,
numKNNQueries: 1,
want: FusionResult{
Hits: search.DocumentMatchCollection{},
Total: 0,
MaxScore: 0.0,
},
},
{
name: "single knn query",
hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.9, ScoreBreakdown: map[int]float64{0: 0.8}},
{ID: "b", Score: 0.8, ScoreBreakdown: map[int]float64{0: 0.9}},
{ID: "c", Score: 0.7, ScoreBreakdown: map[int]float64{0: 0.7}},
},
weights: []float64{0.4, 0.6},
rank_constant: 1,
window_size: 3,
numKNNQueries: 1,
want: FusionResult{
Hits: search.DocumentMatchCollection{
{ID: "b", Score: 0.433},
{ID: "a", Score: 0.4},
{ID: "c", Score: 0.25},
},
Total: 3,
MaxScore: 0.433,
},
},
{
name: "multiple knn queries",
hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.9, ScoreBreakdown: map[int]float64{0: 0.8, 1: 0.6}},
{ID: "b", Score: 0.8, ScoreBreakdown: map[int]float64{0: 0.9, 1: 0.5}},
{ID: "c", Score: 0.7, ScoreBreakdown: map[int]float64{0: 0.7, 1: 0.7}},
},
weights: []float64{0.3, 0.4, 0.3},
rank_constant: 1,
window_size: 3,
numKNNQueries: 2,
want: FusionResult{
Hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.383},
{ID: "b", Score: 0.375},
{ID: "c", Score: 0.325},
},
Total: 3,
MaxScore: 0.383,
},
},
{
name: "window size smaller than hits",
hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.9, ScoreBreakdown: map[int]float64{0: 0.7}},
{ID: "b", Score: 0.8, ScoreBreakdown: map[int]float64{0: 0.9}},
{ID: "c", Score: 0.7, ScoreBreakdown: map[int]float64{0: 0.8}},
},
weights: []float64{0.4, 0.6},
rank_constant: 1,
window_size: 2,
numKNNQueries: 1,
want: FusionResult{
Hits: search.DocumentMatchCollection{
{ID: "b", Score: 0.433},
{ID: "a", Score: 0.2},
},
Total: 2,
MaxScore: 0.433,
},
},
{
name: "documents with partial scores missing KNN scores",
hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.9, ScoreBreakdown: map[int]float64{0: 0.8}}, // has FTS and KNN query 0, missing KNN query 1
{ID: "b", Score: 0.8, ScoreBreakdown: map[int]float64{1: 0.7}}, // has FTS and KNN query 1, missing KNN query 0
{ID: "c", Score: 0.7, ScoreBreakdown: map[int]float64{0: 0.6, 1: 0.9}}, // has all scores
{ID: "d", Score: 0.6, ScoreBreakdown: map[int]float64{}}, // has only FTS, missing all KNN scores
},
weights: []float64{0.3, 0.4, 0.3}, // FTS, KNN query 0, KNN query 1
rank_constant: 1,
window_size: 4,
numKNNQueries: 2,
want: FusionResult{
Hits: search.DocumentMatchCollection{
{ID: "c", Score: 0.358}, // FTS rank 3, KNN0 rank 2, KNN1 rank 1: 0.3/4 + 0.4/3 + 0.3/2 = 0.075 + 0.133 + 0.15 = 0.358
{ID: "a", Score: 0.35}, // FTS rank 1, KNN0 rank 1, no KNN1: 0.3/2 + 0.4/2 + 0 = 0.15 + 0.2 + 0 = 0.35
{ID: "b", Score: 0.2}, // FTS rank 2, no KNN0, KNN1 rank 2: 0.3/3 + 0 + 0.3/3 = 0.1 + 0 + 0.1 = 0.2
{ID: "d", Score: 0.06}, // FTS rank 4, no KNN0, no KNN1: 0.3/5 + 0 + 0 = 0.06
},
Total: 4,
MaxScore: 0.358,
},
},
{
name: "documents with only KNN scores",
hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.0, ScoreBreakdown: map[int]float64{0: 0.9}}, // no FTS rank (Score 0.0), only KNN query 0
{ID: "b", Score: 0.0, ScoreBreakdown: map[int]float64{1: 0.8}}, // no FTS rank (Score 0.0), only KNN query 1
{ID: "c", Score: 0.0, ScoreBreakdown: map[int]float64{0: 0.7, 1: 0.6}}, // no FTS rank (Score 0.0), both KNN queries
},
weights: []float64{0.5, 0.3, 0.2}, // FTS, KNN query 0, KNN query 1
rank_constant: 1,
window_size: 3,
numKNNQueries: 2,
want: FusionResult{
Hits: search.DocumentMatchCollection{
{ID: "c", Score: 0.167}, // no FTS rank, KNN0 rank 2, KNN1 rank 2: 0 + 0.3/3 + 0.2/3 = 0 + 0.1 + 0.067 = 0.167
{ID: "a", Score: 0.15}, // no FTS rank, KNN0 rank 1, no KNN1: 0 + 0.3/2 + 0 = 0 + 0.15 + 0 = 0.15
{ID: "b", Score: 0.1}, // no FTS rank, no KNN0, KNN1 rank 1: 0 + 0 + 0.2/2 = 0 + 0 + 0.1 = 0.1
},
Total: 3,
MaxScore: 0.167,
},
},
{
name: "mixed scenario with gaps in KNN queries",
hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.8, ScoreBreakdown: map[int]float64{1: 0.9}}, // has FTS and KNN query 1, missing KNN query 0
{ID: "b", Score: 0.6, ScoreBreakdown: map[int]float64{0: 0.8}}, // has FTS and KNN query 0, missing KNN query 1
{ID: "c", Score: 0.0, ScoreBreakdown: map[int]float64{0: 0.7}}, // no FTS rank (Score 0.0), only KNN query 0
{ID: "d", Score: 0.4, ScoreBreakdown: map[int]float64{}}, // only FTS, no KNN scores
},
weights: []float64{0.4, 0.3, 0.3}, // FTS, KNN query 0, KNN query 1
rank_constant: 1,
window_size: 4,
numKNNQueries: 2,
want: FusionResult{
Hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.35}, // FTS rank 1, no KNN0, KNN1 rank 1: 0.4/2 + 0 + 0.3/2 = 0.2 + 0 + 0.15 = 0.35
{ID: "b", Score: 0.283}, // FTS rank 2, KNN0 rank 1, no KNN1: 0.4/3 + 0.3/2 + 0 = 0.133 + 0.15 + 0 = 0.283
{ID: "d", Score: 0.1}, // FTS rank 3, no KNN0, no KNN1: 0.4/4 + 0 + 0 = 0.1
{ID: "c", Score: 0.1}, // no FTS rank, KNN0 rank 2, no KNN1: 0 + 0.3/3 + 0 = 0 + 0.1 + 0 = 0.1
},
Total: 4,
MaxScore: 0.35,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
for i, hit := range tt.hits {
hit.HitNumber = uint64(i)
}
if got := ReciprocalRankFusion(tt.hits, tt.weights, tt.rank_constant, tt.window_size, tt.numKNNQueries, false); !compareFusionResults(*got, tt.want) {
t.Errorf("ReciprocalRankFusion() = %v, want %v", got, tt.want)
}
})
}
}
================================================
FILE: fusion/rsf.go
================================================
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fusion
import (
"fmt"
"github.com/blevesearch/bleve/v2/search"
)
// formatRSFMessage builds the explanation string associated with a single
// component of the Relative Score Fusion calculation.
func formatRSFMessage(weight float64, normalizedScore float64, minScore float64, maxScore float64) string {
return fmt.Sprintf("rsf score (weight=%.3f, normalized=%.6f, min=%.6f, max=%.6f), normalized score of",
weight, normalizedScore, minScore, maxScore)
}
// RelativeScoreFusion normalizes the best-scoring documents from the primary
// FTS query and each KNN query, scales those normalized values by the supplied
// weights, and combines them into a single fused score. Only the top
// `windowSize` documents per source are considered, and explanations are
// materialized lazily when requested.
func RelativeScoreFusion(hits search.DocumentMatchCollection, weights []float64, windowSize int, numKNNQueries int, explain bool) *FusionResult {
nHits := len(hits)
if nHits == 0 || windowSize == 0 {
return &FusionResult{
Hits: search.DocumentMatchCollection{},
Total: 0,
MaxScore: 0.0,
}
}
// init explanations if required
var fusionExpl map[*search.DocumentMatch][]*search.Explanation
if explain {
fusionExpl = make(map[*search.DocumentMatch][]*search.Explanation, nHits)
}
// Code here for calculating fts results
// Sort by fts scores
sortDocMatchesByScore(hits)
// ftsLimit holds the total number of fts hits to consider for rsf
ftsLimit := 0
for _, hit := range hits {
if hit.Score == 0.0 {
break
}
ftsLimit++
}
ftsLimit = min(ftsLimit, windowSize)
// calculate fts scores
if ftsLimit > 0 {
max := hits[0].Score
min := hits[ftsLimit-1].Score
denom := max - min
weight := weights[0]
for i := 0; i < ftsLimit; i++ {
hit := hits[i]
norm := 1.0
if denom > 0 {
norm = (hit.Score - min) / denom
}
contrib := weight * norm
if explain {
expl := getFusionExplAt(
hit,
0,
norm,
formatRSFMessage(weight, norm, min, max),
)
fusionExpl[hit] = append(fusionExpl[hit], expl)
}
hit.Score = contrib
}
for i := ftsLimit; i < nHits; i++ {
// These FTS hits are not counted in the results, so set to 0
hits[i].Score = 0.0
}
}
// Code from here is for calculating knn scores
for queryIdx := 0; queryIdx < numKNNQueries; queryIdx++ {
sortDocMatchesByBreakdown(hits, queryIdx)
// knnLimit holds the total number of knn hits retrieved for a specific knn query
knnLimit := 0
for _, hit := range hits {
if _, ok := scoreBreakdownForQuery(hit, queryIdx); !ok {
break
}
knnLimit++
}
knnLimit = min(knnLimit, windowSize)
// if limit is 0, skip calculating
if knnLimit == 0 {
continue
}
max, _ := scoreBreakdownForQuery(hits[0], queryIdx)
min, _ := scoreBreakdownForQuery(hits[knnLimit-1], queryIdx)
denom := max - min
weight := weights[queryIdx+1]
for i := 0; i < knnLimit; i++ {
hit := hits[i]
score, _ := scoreBreakdownForQuery(hit, queryIdx)
norm := 1.0
if denom > 0 {
norm = (score - min) / denom
}
contrib := weight * norm
if explain {
expl := getFusionExplAt(
hit,
queryIdx+1,
norm,
formatRSFMessage(weight, norm, min, max),
)
fusionExpl[hit] = append(fusionExpl[hit], expl)
}
hit.Score += contrib
}
}
// Finalize scores
var maxScore float64
for _, hit := range hits {
if explain {
finalizeFusionExpl(hit, fusionExpl[hit])
}
if hit.Score > maxScore {
maxScore = hit.Score
}
hit.ScoreBreakdown = nil
}
sortDocMatchesByScore(hits)
if nHits > windowSize {
hits = hits[:windowSize]
}
return &FusionResult{
Hits: hits,
Total: uint64(len(hits)),
MaxScore: maxScore,
}
}
================================================
FILE: fusion/rsf_test.go
================================================
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fusion
import (
"testing"
"github.com/blevesearch/bleve/v2/search"
)
func TestRelativeScoreFusion(t *testing.T) {
tests := []struct {
name string
hits search.DocumentMatchCollection
weights []float64
windowSize int
numKNNQueries int
want FusionResult
}{
{
name: "empty hits",
hits: search.DocumentMatchCollection{},
weights: []float64{0.5, 0.5},
windowSize: 10,
numKNNQueries: 1,
want: FusionResult{
Hits: search.DocumentMatchCollection{},
Total: 0,
MaxScore: 0.0,
},
},
{
name: "single knn query",
hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.9, ScoreBreakdown: map[int]float64{0: 0.8}},
{ID: "b", Score: 0.8, ScoreBreakdown: map[int]float64{0: 0.9}},
{ID: "c", Score: 0.7, ScoreBreakdown: map[int]float64{0: 0.7}},
},
weights: []float64{0.4, 0.6},
windowSize: 3,
numKNNQueries: 1,
want: FusionResult{
Hits: search.DocumentMatchCollection{
{ID: "b", Score: 0.8}, // FTS: (0.8-0.7)/(0.9-0.7) * 0.4 + KNN: (0.9-0.7)/(0.9-0.7) * 0.6 = 0.2 + 0.6 = 0.8
{ID: "a", Score: 0.7}, // FTS: (0.9-0.7)/(0.9-0.7) * 0.4 + KNN: (0.8-0.7)/(0.9-0.7) * 0.6 = 0.4 + 0.3 = 0.7
{ID: "c", Score: 0.0}, // FTS: (0.7-0.7)/(0.9-0.7) * 0.4 + KNN: (0.7-0.7)/(0.9-0.7) * 0.6 = 0.0 + 0.0 = 0.0
},
Total: 3,
MaxScore: 0.8,
},
},
{
name: "multiple knn queries",
hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.9, ScoreBreakdown: map[int]float64{0: 0.8, 1: 0.6}},
{ID: "b", Score: 0.8, ScoreBreakdown: map[int]float64{0: 0.9, 1: 0.5}},
{ID: "c", Score: 0.7, ScoreBreakdown: map[int]float64{0: 0.7, 1: 0.7}},
},
weights: []float64{0.3, 0.4, 0.3},
windowSize: 3,
numKNNQueries: 2,
want: FusionResult{
Hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.65}, // FTS: (0.9-0.7)/(0.9-0.7)*0.3 + KNN0: (0.8-0.7)/(0.9-0.7)*0.4 + KNN1: (0.6-0.5)/(0.7-0.5)*0.3 = 1.0*0.3 + 0.5*0.4 + 0.5*0.3 = 0.65
{ID: "b", Score: 0.55}, // FTS: (0.8-0.7)/(0.9-0.7)*0.3 + KNN0: (0.9-0.7)/(0.9-0.7)*0.4 + KNN1: (0.5-0.5)/(0.7-0.5)*0.3 = 0.5*0.3 + 1.0*0.4 + 0.0*0.3 = 0.55
{ID: "c", Score: 0.3}, // FTS: (0.7-0.7)/(0.9-0.7)*0.3 + KNN0: (0.7-0.7)/(0.9-0.7)*0.4 + KNN1: (0.7-0.5)/(0.7-0.5)*0.3 = 0.0*0.3 + 0.0*0.4 + 1.0*0.3 = 0.3
},
Total: 3,
MaxScore: 0.65,
},
},
{
name: "all scores identical should normalize to 1.0",
hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.8, ScoreBreakdown: map[int]float64{0: 0.9}},
{ID: "b", Score: 0.8, ScoreBreakdown: map[int]float64{0: 0.9}},
{ID: "c", Score: 0.8, ScoreBreakdown: map[int]float64{0: 0.9}},
},
weights: []float64{0.4, 0.6},
windowSize: 3,
numKNNQueries: 1,
want: FusionResult{
Hits: search.DocumentMatchCollection{
{ID: "a", Score: 1.0}, // All scores identical: 1.0 * 0.4 + 1.0 * 0.6 = 1.0
{ID: "b", Score: 1.0},
{ID: "c", Score: 1.0},
},
Total: 3,
MaxScore: 1.0,
},
},
{
name: "window size smaller than hits",
hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.9, ScoreBreakdown: map[int]float64{0: 0.7}},
{ID: "b", Score: 0.8, ScoreBreakdown: map[int]float64{0: 0.9}},
{ID: "c", Score: 0.7, ScoreBreakdown: map[int]float64{0: 0.8}},
},
weights: []float64{0.4, 0.6},
windowSize: 2,
numKNNQueries: 1,
want: FusionResult{
Hits: search.DocumentMatchCollection{
{ID: "b", Score: 0.6}, // Using top 2 for min/max: FTS min/max from [0.9,0.8] = [0.8,0.9], KNN min/max from [0.9,0.7] = [0.7,0.9]
{ID: "a", Score: 0.4}, // FTS: (0.9-0.8)/(0.9-0.8) * 0.4 + KNN: (0.7-0.7)/(0.9-0.7) * 0.6 = 0.4 + 0 = 0.4
},
Total: 2,
MaxScore: 0.6,
},
},
{
name: "documents with partial scores missing KNN scores",
hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.9, ScoreBreakdown: map[int]float64{0: 0.8}}, // has FTS and KNN query 0, missing KNN query 1
{ID: "b", Score: 0.8, ScoreBreakdown: map[int]float64{1: 0.7}}, // has FTS and KNN query 1, missing KNN query 0
{ID: "c", Score: 0.7, ScoreBreakdown: map[int]float64{0: 0.6, 1: 0.9}}, // has all scores
{ID: "d", Score: 0.6, ScoreBreakdown: map[int]float64{}}, // has only FTS, missing all KNN scores
},
weights: []float64{0.3, 0.4, 0.3}, // FTS, KNN query 0, KNN query 1
windowSize: 4,
numKNNQueries: 2,
want: FusionResult{
Hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.7}, // FTS: (0.9-0.6)/(0.9-0.6)*0.3 + KNN0: (0.8-0.6)/(0.8-0.6)*0.4 + KNN1: 0 = 1.0*0.3 + 1.0*0.4 + 0 = 0.7
{ID: "c", Score: 0.4}, // FTS: (0.7-0.6)/(0.9-0.6)*0.3 + KNN0: (0.6-0.6)/(0.8-0.6)*0.4 + KNN1: (0.9-0.7)/(0.9-0.7)*0.3 = 0.33*0.3 + 0.0*0.4 + 1.0*0.3 = 0.1 + 0 + 0.3 = 0.4
{ID: "b", Score: 0.2}, // FTS: (0.8-0.6)/(0.9-0.6)*0.3 + KNN0: 0 + KNN1: (0.7-0.7)/(0.9-0.7)*0.3 = 0.67*0.3 + 0 + 0.0*0.3 = 0.2 + 0 + 0 = 0.2
{ID: "d", Score: 0.0}, // FTS: (0.6-0.6)/(0.9-0.6)*0.3 + KNN0: 0 + KNN1: 0 = 0.0*0.3 + 0 + 0 = 0
},
Total: 4,
MaxScore: 0.7,
},
},
{
name: "documents with only KNN scores",
hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.0, ScoreBreakdown: map[int]float64{0: 0.9}}, // no FTS rank (Score 0.0), only KNN query 0
{ID: "b", Score: 0.0, ScoreBreakdown: map[int]float64{1: 0.8}}, // no FTS rank (Score 0.0), only KNN query 1
{ID: "c", Score: 0.0, ScoreBreakdown: map[int]float64{0: 0.7, 1: 0.6}}, // no FTS rank (Score 0.0), both KNN queries
},
weights: []float64{0.5, 0.3, 0.2}, // FTS, KNN query 0, KNN query 1
windowSize: 3,
numKNNQueries: 2,
want: FusionResult{
Hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.3}, // FTS: 0 + KNN0: 1.0 * 0.3 + KNN1: 0 = 0.3
{ID: "b", Score: 0.2}, // FTS: 0 + KNN0: 0 + KNN1: 1.0 * 0.2 = 0.2
{ID: "c", Score: 0.0}, // FTS: 0 + KNN0: 0 * 0.3 + KNN1: 0 * 0.2 = 0
},
Total: 3,
MaxScore: 0.3,
},
},
{
name: "mixed scenario with different score ranges",
hits: search.DocumentMatchCollection{
{ID: "a", Score: 1.0, ScoreBreakdown: map[int]float64{0: 0.1}}, // high FTS, low KNN
{ID: "b", Score: 0.1, ScoreBreakdown: map[int]float64{0: 1.0}}, // low FTS, high KNN
{ID: "c", Score: 0.5, ScoreBreakdown: map[int]float64{0: 0.5}}, // mid FTS, mid KNN
},
weights: []float64{0.5, 0.5}, // Equal weights
windowSize: 3,
numKNNQueries: 1,
want: FusionResult{
Hits: search.DocumentMatchCollection{
{ID: "a", Score: 0.5}, // FTS: (1.0-0.1)/(1.0-0.1)*0.5 + KNN: (0.1-0.1)/(1.0-0.1)*0.5 = 1.0*0.5 + 0.0*0.5 = 0.5
{ID: "b", Score: 0.5}, // FTS: (0.1-0.1)/(1.0-0.1)*0.5 + KNN: (1.0-0.1)/(1.0-0.1)*0.5 = 0.0*0.5 + 1.0*0.5 = 0.5
{ID: "c", Score: 0.444}, // FTS: (0.5-0.1)/(1.0-0.1)*0.5 + KNN: (0.5-0.1)/(1.0-0.1)*0.5 = 0.444*0.5 + 0.444*0.5 = 0.444
},
Total: 3,
MaxScore: 0.5,
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
for i, hit := range tt.hits {
hit.HitNumber = uint64(i)
}
if got := RelativeScoreFusion(tt.hits, tt.weights, tt.windowSize, tt.numKNNQueries, false); !compareFusionResults(*got, tt.want) {
t.Errorf("RelativeScoreFusion() = %v, want %v", got, tt.want)
// Print detailed comparison for debugging
t.Logf("Got hits:")
for i, hit := range got.Hits {
t.Logf(" [%d] ID: %s, Score: %.6f", i, hit.ID, hit.Score)
}
t.Logf("Want hits:")
for i, hit := range tt.want.Hits {
t.Logf(" [%d] ID: %s, Score: %.6f", i, hit.ID, hit.Score)
}
t.Logf("Got Total: %d, MaxScore: %.6f", got.Total, got.MaxScore)
t.Logf("Want Total: %d, MaxScore: %.6f", tt.want.Total, tt.want.MaxScore)
}
})
}
}
================================================
FILE: fusion/util.go
================================================
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package fusion
import (
"sort"
"github.com/blevesearch/bleve/v2/search"
)
// sortDocMatchesByScore orders the provided collection in-place by the primary
// score in descending order, breaking ties with the original `HitNumber` to
// ensure deterministic output.
func sortDocMatchesByScore(hits search.DocumentMatchCollection) {
if len(hits) < 2 {
return
}
sort.Slice(hits, func(a, b int) bool {
i := hits[a]
j := hits[b]
if i.Score == j.Score {
return i.HitNumber < j.HitNumber
}
return i.Score > j.Score
})
}
// scoreBreakdownForQuery fetches the score for a specific KNN query index from
// the provided hit. The boolean return indicates whether the score is present.
func scoreBreakdownForQuery(hit *search.DocumentMatch, idx int) (float64, bool) {
if hit == nil || hit.ScoreBreakdown == nil {
return 0, false
}
score, ok := hit.ScoreBreakdown[idx]
return score, ok
}
// sortDocMatchesByBreakdown orders the hits in-place using the KNN score for
// the supplied query index (descending), breaking ties with `HitNumber` and
// placing hits without a score at the end.
func sortDocMatchesByBreakdown(hits search.DocumentMatchCollection, queryIdx int) {
if len(hits) < 2 {
return
}
sort.SliceStable(hits, func(a, b int) bool {
left := hits[a]
right := hits[b]
var leftScore float64
leftOK := false
if left != nil && left.ScoreBreakdown != nil {
leftScore, leftOK = left.ScoreBreakdown[queryIdx]
}
var rightScore float64
rightOK := false
if right != nil && right.ScoreBreakdown != nil {
rightScore, rightOK = right.ScoreBreakdown[queryIdx]
}
if leftOK && rightOK {
if leftScore == rightScore {
return left.HitNumber < right.HitNumber
}
return leftScore > rightScore
}
if leftOK != rightOK {
return leftOK
}
return left.HitNumber < right.HitNumber
})
}
// getFusionExplAt copies the existing explanation child at the requested index
// and wraps it in a new node describing how the fusion algorithm adjusted the
// score.
func getFusionExplAt(hit *search.DocumentMatch, i int, value float64, message string) *search.Explanation {
return &search.Explanation{
Value: value,
Message: message,
Children: []*search.Explanation{hit.Expl.Children[i]},
}
}
// finalizeFusionExpl installs the collection of fusion explanation children and
// updates the root message so the caller sees the fused score as the sum of its
// parts.
func finalizeFusionExpl(hit *search.DocumentMatch, explChildren []*search.Explanation) {
hit.Expl.Children = explChildren
hit.Expl.Value = hit.Score
hit.Expl.Message = "sum of"
}
================================================
FILE: geo/README.md
================================================
# Geo spatial search support in bleve
Latest bleve spatial capabilities are powered by spatial hierarchical tokens generated from s2geometry.
You can find more details about the [s2geometry basics here](http://s2geometry.io/), and explore the extended functionality of our forked golang port of [s2geometry lib here](https://github.com/blevesearch/geo).
Users can continue to index and query `geopoint` field type and the existing queries like,
- Point Distance
- Bounded Rectangle
- Bounded Polygon
as before.
## New Spatial Field Type - geoshape
We have introduced a field type (`geoshape`) for representing the new spatial types.
Using the new `geoshape` field type, users can unblock the spatial capabilities
for the [geojson](https://datatracker.ietf.org/doc/html/rfc7946) shapes like,
- Point
- LineString
- Polygon
- MultiPoint
- MultiLineString
- MultiPolygon
- GeometryCollection
In addition to these shapes, bleve will also support additional shapes like,
- Circle
- Envelope (Bounded box)
To specify GeoJSON data, use a nested field with:
- a field named type that specifies the GeoJSON object type and the type value will be case-insensitive.
- a field named coordinates that specifies the object's coordinates.
```text
"fieldName": {
"type": "GeoJSON Type",
"coordinates":
}
```
- If specifying latitude and longitude coordinates, list the longitude first and then latitude.
- Valid longitude values are between -180 and 180, both inclusive.
- Valid latitude values are between -90 and 90, both inclusive.
- Shapes would be internally represented as geodesics.
- The GeoJSON specification strongly suggests splitting geometries so that neither of their parts crosses the antimeridian.
Examples for the various geojson shapes representations are as below.
## Point
The following specifies a [Point](https://tools.ietf.org/html/rfc7946#section-3.1.2) field in a document:
```json
{
"type": "point",
"coordinates": [75.05687713623047, 22.53539059204079]
}
```
## Linestring
The following specifies a [Linestring](https://tools.ietf.org/html/rfc7946#section-3.1.4) field in a document:
```json
{
"type": "linestring",
"coordinates": [
[77.01416015625, 23.0797317624497],
[78.134765625, 20.385825381874263]
]
}
```
## Polygon
The following specifies a [Polygon](https://tools.ietf.org/html/rfc7946#section-3.1.6) field in a document:
```json
{
"type": "polygon",
"coordinates": [
[
[85.605, 57.207],
[86.396, 55.998],
[87.033, 56.716],
[85.605, 57.207]
]
]
}
```
The first and last coordinates must match in order to close the polygon.
And the exterior coordinates have to be in Counter Clockwise Order in a polygon. (CCW)
## MultiPoint
The following specifies a [Multipoint](https://tools.ietf.org/html/rfc7946#section-3.1.3) field in a document:
```json
{
"type": "multipoint",
"coordinates": [
[-115.8343505859375, 38.45789034424927],
[-115.81237792968749, 38.19502155795575],
[-120.80017089843749, 36.54053616262899],
[-120.67932128906249, 36.33725319397006]
]
}
```
## MultiLineString
The following specifies a [MultiLineString](https://tools.ietf.org/html/rfc7946#section-3.1.5) field in a document:
```json
{
"type": "multilinestring",
"coordinates": [
[
[-118.31726074, 35.250105158],
[-117.509765624, 35.3756141]
],
[
[-118.696289, 34.624167789],
[-118.317260742, 35.03899204]
],
[
[-117.9492187, 35.146862906],
[-117.6745605, 34.41144164]
]
]
}
```
## MultiPolygon
The following specifies a [MultiPolygon](https://tools.ietf.org/html/rfc7946#section-3.1.7) field in a document:
```json
{
"type": "multipolygon",
"coordinates": [
[
[
[-73.958, 40.8003],
[-73.9498, 40.7968],
[-73.9737, 40.7648],
[-73.9814, 40.7681],
[-73.958, 40.8003]
]
],
[
[
[-73.958, 40.8003],
[-73.9498, 40.7968],
[-73.9737, 40.7648],
[-73.958, 40.8003]
]
]
]
}
```
## GeometryCollection
The following specifies a [GeometryCollection](https://tools.ietf.org/html/rfc7946#section-3.1.8) field in a document:
```json
{
"type": "geometrycollection",
"geometries": [
{
"type": "multipoint",
"coordinates": [
[-73.958, 40.8003],
[-73.9498, 40.7968],
[-73.9737, 40.7648],
[-73.9814, 40.7681]
]
},
{
"type": "multilinestring",
"coordinates": [
[
[-73.96943, 40.78519],
[-73.96082, 40.78095]
],
[
[-73.96415, 40.79229],
[-73.95544, 40.78854]
],
[
[-73.97162, 40.78205],
[-73.96374, 40.77715]
],
[
[-73.9788, 40.77247],
[-73.97036, 40.76811]
]
]
},
{
"type": "polygon",
"coordinates": [
[
[0, 0],
[3, 6],
[6, 1],
[0, 0]
],
[
[2, 2],
[3, 3],
[4, 2],
[2, 2]
]
]
}
]
}
```
## Circle
If the user wishes to cover a circular region over the earth's surface, then they could use this shape.
A sample circular shape is as below.
```json
{
"type": "circle",
"coordinates": [75.05687713623047, 22.53539059204079],
"radius": "1000m"
}
```
Circle is specified over the center point coordinates along with the radius.
Example formats supported for radius are:
"5in" , "5inch" , "7yd" , "7yards", "9ft" , "9feet", "11km", "11kilometers", "3nm", "3nauticalmiles", "13mm" , "13millimeters", "15cm", "15centimeters", "17mi", "17miles", "19m" or "19meters".
If the unit cannot be determined, the entire string is parsed and the unit of meters is assumed.
## Envelope
Envelope type, which consists of coordinates for upper left and lower right points of the shape to represent a bounding rectangle in the format [[minLon, maxLat], [maxLon, minLat]].
```json
{
"type": "envelope",
"coordinates": [
[72.83, 18.979],
[78.508, 17.4555]
]
}
```
## GeoShape Query
Geoshape query support three types/filters of spatial querying capability across those heterogeneous types of documents indexed.
### Query Structure
```json
{
"query": {
"geometry": {
"shape": {
"type": "",
"coordinates": [
[[]]
]
},
"relation": "<>"
}
}
}
```
*shapeType* => can be any of the aforementioned types like Point, LineString, Polygon, MultiPoint,
Geometrycollection, MultiLineString, MultiPolygon, Circle and Envelope.
*filterName* => can be any of the 3 types like *intersects*, *contains* and *within*.
### Relation
| FilterName | Description |
| :-----------:| :-----------------------------------------------------------------: |
| `intersects` | Return all documents whose shape field intersects the query geometry. |
| `contains` | Return all documents whose shape field contains the query geometry |
| `within` | Return all documents whose shape field is within the query geometry. |
------------------------------------------------------------------------------------------------------------------------
### Older Implementation
First, all of this geo code is a Go adaptation of the [Lucene 5.3.2 sandbox geo support](https://lucene.apache.org/core/5_3_2/sandbox/org/apache/lucene/util/package-summary.html).
## Notes
- All of the APIs will use float64 for lon/lat values.
- When describing a point in function arguments or return values, we always use the order lon, lat.
- High level APIs will use TopLeft and BottomRight to describe bounding boxes. This may not map cleanly to min/max lon/lat when crossing the dateline. The lower level APIs will use min/max lon/lat and require the higher-level code to split boxes accordingly.
- Points and MultiPoints may only contain Points and MultiPoints.
- LineStrings and MultiLineStrings may only contain Points and MultiPoints.
- Polygons or MultiPolygons intersecting Polygons and MultiPolygons may return arbitrary results when the overlap is only an edge or a vertex.
- Circles containing polygon will return a false positive result if all of the vertices of the polygon are within the circle, but the orientation of those points are clock-wise.
- The edges of an Envelope follows the latitude and longitude lines instead of the shortest path on a globe.
- Envelope intersecting queries with LineStrings, MultiLineStrings, Polygons and MultiPolygons implicitly converts the Envelope into a Polygon which changes the curvature of the edges causing inaccurate results for few edge cases.
================================================
FILE: geo/benchmark_geohash_test.go
================================================
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"testing"
)
func BenchmarkGeoHashLen5NewDecode(b *testing.B) {
b.ResetTimer()
hash := "d3hn3"
for i := 0; i < b.N; i++ {
_, _ = DecodeGeoHash(hash)
}
}
func BenchmarkGeoHashLen6NewDecode(b *testing.B) {
b.ResetTimer()
hash := "u4pruy"
for i := 0; i < b.N; i++ {
_, _ = DecodeGeoHash(hash)
}
}
func BenchmarkGeoHashLen7NewDecode(b *testing.B) {
b.ResetTimer()
hash := "u4pruyd"
for i := 0; i < b.N; i++ {
_, _ = DecodeGeoHash(hash)
}
}
================================================
FILE: geo/geo.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"fmt"
"math"
"github.com/blevesearch/bleve/v2/numeric"
)
// GeoBits is the number of bits used for a single geo point
// Currently this is 32bits for lon and 32bits for lat
var GeoBits uint = 32
var minLon = -180.0
var minLat = -90.0
var maxLon = 180.0
var maxLat = 90.0
var minLonRad = minLon * degreesToRadian
var minLatRad = minLat * degreesToRadian
var maxLonRad = maxLon * degreesToRadian
var maxLatRad = maxLat * degreesToRadian
var geoTolerance = 1e-6
var lonScale = float64((uint64(0x1)<> 1))
}
func unscaleLon(lon uint64) float64 {
return (float64(lon) / lonScale) + minLon
}
func unscaleLat(lat uint64) float64 {
return (float64(lat) / latScale) + minLat
}
// compareGeo will compare two float values and see if they are the same
// taking into consideration a known geo tolerance.
func compareGeo(a, b float64) float64 {
compare := a - b
if math.Abs(compare) <= geoTolerance {
return 0
}
return compare
}
// RectIntersects checks whether rectangles a and b intersect
func RectIntersects(aMinX, aMinY, aMaxX, aMaxY, bMinX, bMinY, bMaxX, bMaxY float64) bool {
return !(aMaxX < bMinX || aMinX > bMaxX || aMaxY < bMinY || aMinY > bMaxY)
}
// RectWithin checks whether box a is within box b
func RectWithin(aMinX, aMinY, aMaxX, aMaxY, bMinX, bMinY, bMaxX, bMaxY float64) bool {
rv := !(aMinX < bMinX || aMinY < bMinY || aMaxX > bMaxX || aMaxY > bMaxY)
return rv
}
// BoundingBoxContains checks whether the lon/lat point is within the box
func BoundingBoxContains(lon, lat, minLon, minLat, maxLon, maxLat float64) bool {
return compareGeo(lon, minLon) >= 0 && compareGeo(lon, maxLon) <= 0 &&
compareGeo(lat, minLat) >= 0 && compareGeo(lat, maxLat) <= 0
}
const degreesToRadian = math.Pi / 180
const radiansToDegrees = 180 / math.Pi
// DegreesToRadians converts an angle in degrees to radians
func DegreesToRadians(d float64) float64 {
return d * degreesToRadian
}
// RadiansToDegrees converts an angle in radians to degrees
func RadiansToDegrees(r float64) float64 {
return r * radiansToDegrees
}
var earthMeanRadiusMeters = 6371008.7714
func RectFromPointDistance(lon, lat, dist float64) (float64, float64, float64, float64, error) {
err := checkLongitude(lon)
if err != nil {
return 0, 0, 0, 0, err
}
err = checkLatitude(lat)
if err != nil {
return 0, 0, 0, 0, err
}
radLon := DegreesToRadians(lon)
radLat := DegreesToRadians(lat)
radDistance := (dist + 7e-2) / earthMeanRadiusMeters
minLatL := radLat - radDistance
maxLatL := radLat + radDistance
var minLonL, maxLonL float64
if minLatL > minLatRad && maxLatL < maxLatRad {
deltaLon := math.Asin(math.Sin(radDistance) / math.Cos(radLat))
minLonL = radLon - deltaLon
if minLonL < minLonRad {
minLonL += 2 * math.Pi
}
maxLonL = radLon + deltaLon
if maxLonL > maxLonRad {
maxLonL -= 2 * math.Pi
}
} else {
// pole is inside distance
minLatL = math.Max(minLatL, minLatRad)
maxLatL = math.Min(maxLatL, maxLatRad)
minLonL = minLonRad
maxLonL = maxLonRad
}
return RadiansToDegrees(minLonL),
RadiansToDegrees(maxLatL),
RadiansToDegrees(maxLonL),
RadiansToDegrees(minLatL),
nil
}
func checkLatitude(latitude float64) error {
if math.IsNaN(latitude) || latitude < minLat || latitude > maxLat {
return fmt.Errorf("invalid latitude %f; must be between %f and %f", latitude, minLat, maxLat)
}
return nil
}
func checkLongitude(longitude float64) error {
if math.IsNaN(longitude) || longitude < minLon || longitude > maxLon {
return fmt.Errorf("invalid longitude %f; must be between %f and %f", longitude, minLon, maxLon)
}
return nil
}
func BoundingRectangleForPolygon(polygon []Point) (
float64, float64, float64, float64, error) {
err := checkLongitude(polygon[0].Lon)
if err != nil {
return 0, 0, 0, 0, err
}
err = checkLatitude(polygon[0].Lat)
if err != nil {
return 0, 0, 0, 0, err
}
maxY, minY := polygon[0].Lat, polygon[0].Lat
maxX, minX := polygon[0].Lon, polygon[0].Lon
for i := 1; i < len(polygon); i++ {
err := checkLongitude(polygon[i].Lon)
if err != nil {
return 0, 0, 0, 0, err
}
err = checkLatitude(polygon[i].Lat)
if err != nil {
return 0, 0, 0, 0, err
}
maxY = math.Max(maxY, polygon[i].Lat)
minY = math.Min(minY, polygon[i].Lat)
maxX = math.Max(maxX, polygon[i].Lon)
minX = math.Min(minX, polygon[i].Lon)
}
return minX, maxY, maxX, minY, nil
}
================================================
FILE: geo/geo_dist.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"fmt"
"math"
"strconv"
"strings"
)
type distanceUnit struct {
conv float64
suffixes []string
}
var inch = distanceUnit{0.0254, []string{"in", "inch"}}
var yard = distanceUnit{0.9144, []string{"yd", "yards"}}
var feet = distanceUnit{0.3048, []string{"ft", "feet"}}
var kilom = distanceUnit{1000, []string{"km", "kilometers"}}
var nauticalm = distanceUnit{1852.0, []string{"nm", "nauticalmiles"}}
var millim = distanceUnit{0.001, []string{"mm", "millimeters"}}
var centim = distanceUnit{0.01, []string{"cm", "centimeters"}}
var miles = distanceUnit{1609.344, []string{"mi", "miles"}}
var meters = distanceUnit{1, []string{"m", "meters"}}
var distanceUnits = []*distanceUnit{
&inch, &yard, &feet, &kilom, &nauticalm, &millim, ¢im, &miles, &meters,
}
// ParseDistance attempts to parse a distance string and return distance in
// meters. Example formats supported:
// "5in" "5inch" "7yd" "7yards" "9ft" "9feet" "11km" "11kilometers"
// "3nm" "3nauticalmiles" "13mm" "13millimeters" "15cm" "15centimeters"
// "17mi" "17miles" "19m" "19meters"
// If the unit cannot be determined, the entire string is parsed and the
// unit of meters is assumed.
// If the number portion cannot be parsed, 0 and the parse error are returned.
func ParseDistance(d string) (float64, error) {
for _, unit := range distanceUnits {
for _, unitSuffix := range unit.suffixes {
if strings.HasSuffix(d, unitSuffix) {
parsedNum, err := strconv.ParseFloat(d[0:len(d)-len(unitSuffix)], 64)
if err != nil {
return 0, err
}
return parsedNum * unit.conv, nil
}
}
}
// no unit matched, try assuming meters?
parsedNum, err := strconv.ParseFloat(d, 64)
if err != nil {
return 0, err
}
return parsedNum, nil
}
// ParseDistanceUnit attempts to parse a distance unit and return the
// multiplier for converting this to meters. If the unit cannot be parsed
// then 0 and the error message is returned.
func ParseDistanceUnit(u string) (float64, error) {
for _, unit := range distanceUnits {
for _, unitSuffix := range unit.suffixes {
if u == unitSuffix {
return unit.conv, nil
}
}
}
return 0, fmt.Errorf("unknown distance unit: %s", u)
}
// Haversin computes the distance between two points.
// This implementation uses the sloppy math implementations which trade off
// accuracy for performance. The distance returned is in kilometers.
func Haversin(lon1, lat1, lon2, lat2 float64) float64 {
x1 := lat1 * degreesToRadian
x2 := lat2 * degreesToRadian
h1 := 1 - math.Cos(x1-x2)
h2 := 1 - math.Cos((lon1-lon2)*degreesToRadian)
h := (h1 + math.Cos(x1)*math.Cos(x2)*h2) / 2
avgLat := (x1 + x2) / 2
diameter := earthDiameter(avgLat)
return diameter * math.Asin(math.Min(1, math.Sqrt(h)))
}
================================================
FILE: geo/geo_dist_test.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"fmt"
"math"
"reflect"
"strconv"
"testing"
)
func TestParseDistance(t *testing.T) {
tests := []struct {
dist string
want float64
wantErr error
}{
{"5mi", 5 * 1609.344, nil},
{"3", 3, nil},
{"3m", 3, nil},
{"5km", 5000, nil},
{"km", 0, &strconv.NumError{Func: "ParseFloat", Num: "", Err: strconv.ErrSyntax}},
{"", 0, &strconv.NumError{Func: "ParseFloat", Num: "", Err: strconv.ErrSyntax}},
}
for _, test := range tests {
got, err := ParseDistance(test.dist)
if !reflect.DeepEqual(err, test.wantErr) {
t.Errorf("expected err: %v, got %v for %s", test.wantErr, err, test.dist)
}
if got != test.want {
t.Errorf("expected distance %f got %f for %s", test.want, got, test.dist)
}
}
}
func TestParseDistanceUnit(t *testing.T) {
tests := []struct {
dist string
want float64
wantErr error
}{
{"mi", 1609.344, nil},
{"m", 1, nil},
{"km", 1000, nil},
{"", 0, fmt.Errorf("unknown distance unit: ")},
{"kam", 0, fmt.Errorf("unknown distance unit: kam")},
}
for _, test := range tests {
got, err := ParseDistanceUnit(test.dist)
if !reflect.DeepEqual(err, test.wantErr) {
t.Errorf("expected err: %v, got %v for %s", test.wantErr, err, test.dist)
}
if got != test.want {
t.Errorf("expected distance %f got %f for %s", test.want, got, test.dist)
}
}
}
func TestHaversinDistance(t *testing.T) {
earthRadiusKMs := 6378.137
halfCircle := earthRadiusKMs * math.Pi
tests := []struct {
lon1 float64
lat1 float64
lon2 float64
lat2 float64
want float64
}{
{1, 1, math.NaN(), 1, math.NaN()},
{1, 1, 1, math.NaN(), math.NaN()},
{1, math.NaN(), 1, 1, math.NaN()},
{math.NaN(), 1, 1, 1, math.NaN()},
{0, 0, 0, 0, 0},
{-180, 0, -180, 0, 0},
{-180, 0, 180, 0, 0},
{180, 0, 180, 0, 0},
{0, 90, 0, 90, 0},
{-180, 90, -180, 90, 0},
{-180, 90, 180, 90, 0},
{180, 90, 180, 90, 0},
{0, 0, 180, 0, halfCircle},
{-74.0059731, 40.7143528, -74.0059731, 40.7143528, 0},
{-74.0059731, 40.7143528, -73.9844722, 40.759011, 5.286},
{-74.0059731, 40.7143528, -74.007819, 40.718266, 0.4621},
{-74.0059731, 40.7143528, -74.0088305, 40.7051157, 1.055},
{-74.0059731, 40.7143528, -74, 40.7247222, 1.258},
{-74.0059731, 40.7143528, -73.9962255, 40.731033, 2.029},
{-74.0059731, 40.7143528, -73.95, 40.65, 8.572},
}
for _, test := range tests {
got := Haversin(test.lon1, test.lat1, test.lon2, test.lat2)
if math.IsNaN(test.want) && !math.IsNaN(got) {
t.Errorf("expected NaN, got %f", got)
}
if !math.IsNaN(test.want) && math.Abs(got-test.want) > 1e-2 {
t.Errorf("expected %f got %f", test.want, got)
}
}
}
================================================
FILE: geo/geo_s2plugin_impl.go
================================================
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"encoding/json"
"sync"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
"github.com/blevesearch/geo/geojson"
"github.com/blevesearch/geo/s2"
)
const (
PointType = "point"
MultiPointType = "multipoint"
LineStringType = "linestring"
MultiLineStringType = "multilinestring"
PolygonType = "polygon"
MultiPolygonType = "multipolygon"
GeometryCollectionType = "geometrycollection"
CircleType = "circle"
EnvelopeType = "envelope"
)
// spatialPluginsMap is spatial plugin cache.
var (
spatialPluginsMap = make(map[string]index.SpatialAnalyzerPlugin)
pluginsMapLock = sync.RWMutex{}
)
func init() {
registerS2RegionTermIndexer()
}
func registerS2RegionTermIndexer() {
spatialPlugin := S2SpatialAnalyzerPlugin{
s2Indexer: s2.NewRegionTermIndexerWithOptions(initS2IndexerOptions()),
s2Searcher: s2.NewRegionTermIndexerWithOptions(initS2SearcherOptions()),
s2GeoPointsRegionTermIndexer: s2.NewRegionTermIndexerWithOptions(initS2OptionsForGeoPoints()),
}
RegisterSpatialAnalyzerPlugin(&spatialPlugin)
}
// RegisterSpatialAnalyzerPlugin registers the given plugin implementation.
func RegisterSpatialAnalyzerPlugin(plugin index.SpatialAnalyzerPlugin) {
pluginsMapLock.Lock()
spatialPluginsMap[plugin.Type()] = plugin
pluginsMapLock.Unlock()
}
// GetSpatialAnalyzerPlugin retrieves the given implementation type.
func GetSpatialAnalyzerPlugin(typ string) index.SpatialAnalyzerPlugin {
pluginsMapLock.RLock()
rv := spatialPluginsMap[typ]
pluginsMapLock.RUnlock()
return rv
}
// initS2IndexerOptions returns the options for s2's region
// term indexer for the index time tokens of geojson shapes.
func initS2IndexerOptions() s2.Options {
options := s2.Options{}
// maxLevel control the maximum size of the
// S2Cells used to approximate regions.
options.SetMaxLevel(16)
// minLevel control the minimum size of the
// S2Cells used to approximate regions.
options.SetMinLevel(2)
// levelMod value greater than 1 increases the effective branching
// factor of the S2Cell hierarchy by skipping some levels.
options.SetLevelMod(1)
// maxCells controls the maximum number of cells
// when approximating each s2 region.
options.SetMaxCells(20)
return options
}
// initS2SearcherOptions returns the options for s2's region
// term indexer for the query time tokens of geojson shapes.
func initS2SearcherOptions() s2.Options {
options := s2.Options{}
// maxLevel control the maximum size of the
// S2Cells used to approximate regions.
options.SetMaxLevel(16)
// minLevel control the minimum size of the
// S2Cells used to approximate regions.
options.SetMinLevel(2)
// levelMod value greater than 1 increases the effective branching
// factor of the S2Cell hierarchy by skipping some levels.
options.SetLevelMod(1)
// maxCells controls the maximum number of cells
// when approximating each s2 region.
options.SetMaxCells(8)
return options
}
// initS2OptionsForGeoPoints returns the options for
// s2's region term indexer for the original geopoints.
func initS2OptionsForGeoPoints() s2.Options {
options := s2.Options{}
// maxLevel control the maximum size of the
// S2Cells used to approximate regions.
options.SetMaxLevel(16)
// minLevel control the minimum size of the
// S2Cells used to approximate regions.
options.SetMinLevel(4)
// levelMod value greater than 1 increases the effective branching
// factor of the S2Cell hierarchy by skipping some levels.
options.SetLevelMod(2)
// maxCells controls the maximum number of cells
// when approximating each s2 region.
options.SetMaxCells(8)
// explicit for geo points.
options.SetPointsOnly(true)
return options
}
// S2SpatialAnalyzerPlugin is an implementation of
// the index.SpatialAnalyzerPlugin interface.
type S2SpatialAnalyzerPlugin struct {
s2Indexer *s2.RegionTermIndexer
s2Searcher *s2.RegionTermIndexer
s2GeoPointsRegionTermIndexer *s2.RegionTermIndexer
}
func (s *S2SpatialAnalyzerPlugin) Type() string {
return "s2"
}
func (s *S2SpatialAnalyzerPlugin) GetIndexTokens(queryShape index.GeoJSON) []string {
var rv []string
shapes := []index.GeoJSON{queryShape}
if gc, ok := queryShape.(*geojson.GeometryCollection); ok {
shapes = gc.Shapes
}
for _, shape := range shapes {
if s2t, ok := shape.(s2Tokenizable); ok {
rv = append(rv, s2t.IndexTokens(s.s2Indexer)...)
} else if s2t, ok := shape.(s2TokenizableEx); ok {
rv = append(rv, s2t.IndexTokens(s)...)
}
}
return geojson.DeduplicateTerms(rv)
}
func (s *S2SpatialAnalyzerPlugin) GetQueryTokens(queryShape index.GeoJSON) []string {
var rv []string
shapes := []index.GeoJSON{queryShape}
if gc, ok := queryShape.(*geojson.GeometryCollection); ok {
shapes = gc.Shapes
}
for _, shape := range shapes {
if s2t, ok := shape.(s2Tokenizable); ok {
rv = append(rv, s2t.QueryTokens(s.s2Searcher)...)
} else if s2t, ok := shape.(s2TokenizableEx); ok {
rv = append(rv, s2t.QueryTokens(s)...)
}
}
return geojson.DeduplicateTerms(rv)
}
// ------------------------------------------------------------------------
// s2Tokenizable is an optional interface for shapes that support
// the generation of s2 based tokens that can be used for both
// indexing and querying.
type s2Tokenizable interface {
// IndexTokens returns the tokens for indexing.
IndexTokens(*s2.RegionTermIndexer) []string
// QueryTokens returns the tokens for searching.
QueryTokens(*s2.RegionTermIndexer) []string
}
// ------------------------------------------------------------------------
// s2TokenizableEx is an optional interface for shapes that support
// the generation of s2 based tokens that can be used for both
// indexing and querying. This is intended for the older geopoint
// indexing and querying.
type s2TokenizableEx interface {
// IndexTokens returns the tokens for indexing.
IndexTokens(*S2SpatialAnalyzerPlugin) []string
// QueryTokens returns the tokens for searching.
QueryTokens(*S2SpatialAnalyzerPlugin) []string
}
//----------------------------------------------------------------------------------
func (p *Point) Type() string {
return PointType
}
func (p *Point) Value() ([]byte, error) {
return util.MarshalJSON(p)
}
func (p *Point) Intersects(s index.GeoJSON) (bool, error) {
// placeholder implementation
return false, nil
}
func (p *Point) Contains(s index.GeoJSON) (bool, error) {
// placeholder implementation
return false, nil
}
func (p *Point) IndexTokens(s *S2SpatialAnalyzerPlugin) []string {
return s.s2GeoPointsRegionTermIndexer.GetIndexTermsForPoint(s2.PointFromLatLng(
s2.LatLngFromDegrees(p.Lat, p.Lon)), "")
}
func (p *Point) QueryTokens(s *S2SpatialAnalyzerPlugin) []string {
return nil
}
//----------------------------------------------------------------------------------
type boundedRectangle struct {
minLat float64
maxLat float64
minLon float64
maxLon float64
}
func NewBoundedRectangle(minLat, minLon, maxLat,
maxLon float64) *boundedRectangle {
return &boundedRectangle{minLat: minLat,
maxLat: maxLat, minLon: minLon, maxLon: maxLon}
}
func (br *boundedRectangle) Type() string {
// placeholder implementation
return "boundedRectangle"
}
func (br *boundedRectangle) Value() ([]byte, error) {
return util.MarshalJSON(br)
}
func (p *boundedRectangle) Intersects(s index.GeoJSON) (bool, error) {
// placeholder implementation
return false, nil
}
func (p *boundedRectangle) Contains(s index.GeoJSON) (bool, error) {
// placeholder implementation
return false, nil
}
func (br *boundedRectangle) IndexTokens(s *S2SpatialAnalyzerPlugin) []string {
return nil
}
func (br *boundedRectangle) QueryTokens(s *S2SpatialAnalyzerPlugin) []string {
rect := s2.RectFromDegrees(br.minLat, br.minLon, br.maxLat, br.maxLon)
// obtain the terms to be searched for the given bounding box.
terms := s.s2GeoPointsRegionTermIndexer.GetQueryTermsForRegion(rect, "")
return geojson.StripCoveringTerms(terms)
}
//----------------------------------------------------------------------------------
type boundedPolygon struct {
coordinates []Point
}
func NewBoundedPolygon(coordinates []Point) *boundedPolygon {
return &boundedPolygon{coordinates: coordinates}
}
func (bp *boundedPolygon) Type() string {
// placeholder implementation
return "boundedPolygon"
}
func (bp *boundedPolygon) Value() ([]byte, error) {
return util.MarshalJSON(bp)
}
func (p *boundedPolygon) Intersects(s index.GeoJSON) (bool, error) {
// placeholder implementation
return false, nil
}
func (p *boundedPolygon) Contains(s index.GeoJSON) (bool, error) {
// placeholder implementation
return false, nil
}
func (bp *boundedPolygon) IndexTokens(s *S2SpatialAnalyzerPlugin) []string {
return nil
}
func (bp *boundedPolygon) QueryTokens(s *S2SpatialAnalyzerPlugin) []string {
vertices := make([]s2.Point, len(bp.coordinates))
for i, point := range bp.coordinates {
vertices[i] = s2.PointFromLatLng(
s2.LatLngFromDegrees(point.Lat, point.Lon))
}
s2polygon := s2.PolygonFromOrientedLoops([]*s2.Loop{s2.LoopFromPoints(vertices)})
// obtain the terms to be searched for the given polygon.
terms := s.s2GeoPointsRegionTermIndexer.GetQueryTermsForRegion(
s2polygon.CapBound(), "")
return geojson.StripCoveringTerms(terms)
}
//----------------------------------------------------------------------------------
type pointDistance struct {
dist float64
centerLat float64
centerLon float64
}
func (p *pointDistance) Type() string {
// placeholder implementation
return "pointDistance"
}
func (p *pointDistance) Value() ([]byte, error) {
return util.MarshalJSON(p)
}
func NewPointDistance(centerLat, centerLon,
dist float64) *pointDistance {
return &pointDistance{centerLat: centerLat,
centerLon: centerLon, dist: dist}
}
func (p *pointDistance) Intersects(s index.GeoJSON) (bool, error) {
// placeholder implementation
return false, nil
}
func (p *pointDistance) Contains(s index.GeoJSON) (bool, error) {
// placeholder implementation
return false, nil
}
func (pd *pointDistance) IndexTokens(s *S2SpatialAnalyzerPlugin) []string {
return nil
}
func (pd *pointDistance) QueryTokens(s *S2SpatialAnalyzerPlugin) []string {
// obtain the covering query region from the given points.
queryRegion := s2.CapFromCenterAndRadius(pd.centerLat,
pd.centerLon, pd.dist)
// obtain the query terms for the query region.
terms := s.s2GeoPointsRegionTermIndexer.GetQueryTermsForRegion(queryRegion, "")
return geojson.StripCoveringTerms(terms)
}
// ------------------------------------------------------------------------
// NewGeometryCollection instantiate a geometrycollection
// and prefix the byte contents with certain glue bytes that
// can be used later while filering the doc values.
func NewGeometryCollection(coordinates [][][][][]float64,
typs []string) (index.GeoJSON, []byte, error) {
shapes := make([]*geojson.GeoShape, len(coordinates))
for i := range coordinates {
shapes[i] = &geojson.GeoShape{
Coordinates: coordinates[i],
Type: typs[i],
}
}
return geojson.NewGeometryCollection(shapes)
}
func NewGeometryCollectionFromShapes(shapes []*geojson.GeoShape) (
index.GeoJSON, []byte, error) {
return geojson.NewGeometryCollection(shapes)
}
// NewGeoCircleShape instantiate a circle shape and
// prefix the byte contents with certain glue bytes that
// can be used later while filering the doc values.
func NewGeoCircleShape(cp []float64,
radius string) (index.GeoJSON, []byte, error) {
return geojson.NewGeoCircleShape(cp, radius)
}
func NewGeoJsonShape(coordinates [][][][]float64, typ string) (
index.GeoJSON, []byte, error) {
return geojson.NewGeoJsonShape(coordinates, typ)
}
func NewGeoJsonPoint(points []float64) index.GeoJSON {
return geojson.NewGeoJsonPoint(points)
}
func NewGeoJsonMultiPoint(points [][]float64) index.GeoJSON {
return geojson.NewGeoJsonMultiPoint(points)
}
func NewGeoJsonLinestring(points [][]float64) index.GeoJSON {
return geojson.NewGeoJsonLinestring(points)
}
func NewGeoJsonMultilinestring(points [][][]float64) index.GeoJSON {
return geojson.NewGeoJsonMultilinestring(points)
}
func NewGeoJsonPolygon(points [][][]float64) index.GeoJSON {
return geojson.NewGeoJsonPolygon(points)
}
func NewGeoJsonMultiPolygon(points [][][][]float64) index.GeoJSON {
return geojson.NewGeoJsonMultiPolygon(points)
}
func NewGeoCircle(points []float64, radius string) index.GeoJSON {
return geojson.NewGeoCircle(points, radius)
}
func NewGeoEnvelope(points [][]float64) index.GeoJSON {
return geojson.NewGeoEnvelope(points)
}
func ParseGeoJSONShape(input json.RawMessage) (index.GeoJSON, error) {
return geojson.ParseGeoJSONShape(input)
}
================================================
FILE: geo/geo_test.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"math"
"testing"
)
func TestMortonHashMortonUnhash(t *testing.T) {
tests := []struct {
lon float64
lat float64
}{
{-180.0, -90.0},
{-5, 27.3},
{0, 0},
{1.0, 1.0},
{24.7, -80.4},
{180.0, 90.0},
}
for _, test := range tests {
hash := MortonHash(test.lon, test.lat)
lon := MortonUnhashLon(hash)
lat := MortonUnhashLat(hash)
if compareGeo(test.lon, lon) != 0 {
t.Errorf("expected lon %f, got %f, hash %x", test.lon, lon, hash)
}
if compareGeo(test.lat, lat) != 0 {
t.Errorf("expected lat %f, got %f, hash %x", test.lat, lat, hash)
}
}
}
func TestScaleLonUnscaleLon(t *testing.T) {
tests := []struct {
lon float64
}{
{-180.0},
{0.0},
{1.0},
{180.0},
}
for _, test := range tests {
s := scaleLon(test.lon)
lon := unscaleLon(s)
if compareGeo(test.lon, lon) != 0 {
t.Errorf("expected %f, got %f, scaled was %d", test.lon, lon, s)
}
}
}
func TestScaleLatUnscaleLat(t *testing.T) {
tests := []struct {
lat float64
}{
{-90.0},
{0.0},
{1.0},
{90.0},
}
for _, test := range tests {
s := scaleLat(test.lat)
lat := unscaleLat(s)
if compareGeo(test.lat, lat) != 0 {
t.Errorf("expected %.16f, got %.16f, scaled was %d", test.lat, lat, s)
}
}
}
func TestRectFromPointDistance(t *testing.T) {
// at the equator 1 degree of latitude is about 110567 meters
_, upperLeftLat, _, lowerRightLat, err := RectFromPointDistance(0, 0, 110567)
if err != nil {
t.Fatal(err)
}
if math.Abs(upperLeftLat-1) > 1e-2 {
t.Errorf("expected bounding box upper left lat to be almost 1, got %f", upperLeftLat)
}
if math.Abs(lowerRightLat+1) > 1e-2 {
t.Errorf("expected bounding box lower right lat to be almost -1, got %f", lowerRightLat)
}
}
func TestRectIntersects(t *testing.T) {
tests := []struct {
aMinX float64
aMinY float64
aMaxX float64
aMaxY float64
bMinX float64
bMinY float64
bMaxX float64
bMaxY float64
want bool
}{
// clearly overlap
{0, 0, 2, 2, 1, 1, 3, 3, true},
// clearly do not overalp
{0, 0, 1, 1, 2, 2, 3, 3, false},
// share common point
{0, 0, 1, 1, 1, 1, 2, 2, true},
}
for _, test := range tests {
got := RectIntersects(test.aMinX, test.aMinY, test.aMaxX, test.aMaxY, test.bMinX, test.bMinY, test.bMaxX, test.bMaxY)
if test.want != got {
t.Errorf("expected intersects %t, got %t for %f %f %f %f %f %f %f %f", test.want, got, test.aMinX, test.aMinY, test.aMaxX, test.aMaxY, test.bMinX, test.bMinY, test.bMaxX, test.bMaxY)
}
}
}
func TestRectWithin(t *testing.T) {
tests := []struct {
aMinX float64
aMinY float64
aMaxX float64
aMaxY float64
bMinX float64
bMinY float64
bMaxX float64
bMaxY float64
want bool
}{
// clearly within
{1, 1, 2, 2, 0, 0, 3, 3, true},
// clearly not within
{0, 0, 1, 1, 2, 2, 3, 3, false},
// overlapping
{0, 0, 2, 2, 1, 1, 3, 3, false},
// share common point
{0, 0, 1, 1, 1, 1, 2, 2, false},
// within, but boxes reversed (b is within a, but not a within b)
{0, 0, 3, 3, 1, 1, 2, 2, false},
}
for _, test := range tests {
got := RectWithin(test.aMinX, test.aMinY, test.aMaxX, test.aMaxY, test.bMinX, test.bMinY, test.bMaxX, test.bMaxY)
if test.want != got {
t.Errorf("expected within %t, got %t for %f %f %f %f %f %f %f %f", test.want, got, test.aMinX, test.aMinY, test.aMaxX, test.aMaxY, test.bMinX, test.bMinY, test.bMaxX, test.bMaxY)
}
}
}
func TestBoundingBoxContains(t *testing.T) {
tests := []struct {
lon float64
lat float64
minX float64
minY float64
maxX float64
maxY float64
want bool
}{
// clearly contains
{1, 1, 0, 0, 2, 2, true},
// clearly does not contain
{0, 0, 1, 1, 2, 2, false},
// on corner
{0, 0, 0, 0, 2, 2, true},
}
for _, test := range tests {
got := BoundingBoxContains(test.lon, test.lat, test.minX, test.minY, test.maxX, test.maxY)
if test.want != got {
t.Errorf("expected box contains %t, got %t for %f,%f in %f %f %f %f ", test.want, got, test.lon, test.lat, test.minX, test.minY, test.maxX, test.maxY)
}
}
}
================================================
FILE: geo/geohash.go
================================================
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// This implementation is inspired from the geohash-js
// ref: https://github.com/davetroy/geohash-js
package geo
// encoding encapsulates an encoding defined by a given base32 alphabet.
type encoding struct {
enc string
dec [256]byte
}
// newEncoding constructs a new encoding defined by the given alphabet,
// which must be a 32-byte string.
func newEncoding(encoder string) *encoding {
e := new(encoding)
e.enc = encoder
for i := 0; i < len(e.dec); i++ {
e.dec[i] = 0xff
}
for i := 0; i < len(encoder); i++ {
e.dec[encoder[i]] = byte(i)
}
return e
}
// base32encoding with the Geohash alphabet.
var base32encoding = newEncoding("0123456789bcdefghjkmnpqrstuvwxyz")
var masks = []uint64{16, 8, 4, 2, 1}
// DecodeGeoHash decodes the string geohash faster with
// higher precision. This api is in experimental phase.
func DecodeGeoHash(geoHash string) (float64, float64) {
even := true
lat := []float64{-90.0, 90.0}
lon := []float64{-180.0, 180.0}
for i := 0; i < len(geoHash); i++ {
cd := uint64(base32encoding.dec[geoHash[i]])
for j := 0; j < 5; j++ {
if even {
if cd&masks[j] > 0 {
lon[0] = (lon[0] + lon[1]) / 2
} else {
lon[1] = (lon[0] + lon[1]) / 2
}
} else {
if cd&masks[j] > 0 {
lat[0] = (lat[0] + lat[1]) / 2
} else {
lat[1] = (lat[0] + lat[1]) / 2
}
}
even = !even
}
}
return (lat[0] + lat[1]) / 2, (lon[0] + lon[1]) / 2
}
func EncodeGeoHash(lat, lon float64) string {
even := true
lats := []float64{-90.0, 90.0}
lons := []float64{-180.0, 180.0}
precision := 12
var ch, bit uint64
var geoHash string
for len(geoHash) < precision {
if even {
mid := (lons[0] + lons[1]) / 2
if lon > mid {
ch |= masks[bit]
lons[0] = mid
} else {
lons[1] = mid
}
} else {
mid := (lats[0] + lats[1]) / 2
if lat > mid {
ch |= masks[bit]
lats[0] = mid
} else {
lats[1] = mid
}
}
even = !even
if bit < 4 {
bit++
} else {
geoHash += string(base32encoding.enc[ch])
ch = 0
bit = 0
}
}
return geoHash
}
================================================
FILE: geo/geohash_test.go
================================================
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"strings"
"testing"
)
func TestDecodeGeoHash(t *testing.T) {
tests := []struct {
hash string
lon float64
lat float64
}{
{"d3hn3", -73.059082, 6.745605}, // -73.05908203, 6.74560547 as per http://geohash.co/
{"u4pru", 10.393066, 57.634277}, // 10.39306641, 57.63427734
{"u4pruy", 10.409546, 57.648010}, // 10.40954590, 57.64801025
{"u4pruyd", 10.407486, 57.648697}, // 10.40748596, 57.64869690
{"u4pruydqqvj", 10.40744, 57.64911}, // 10.40743969, 57.64911063
}
for _, test := range tests {
lat, lon := DecodeGeoHash(test.hash)
if compareGeo(test.lon, lon) != 0 {
t.Errorf("expected lon %f, got %f, hash %s", test.lon, lon, test.hash)
}
if compareGeo(test.lat, lat) != 0 {
t.Errorf("expected lat %f, got %f, hash %s", test.lat, lat, test.hash)
}
}
}
func TestEncodeGeoHash(t *testing.T) {
tests := []struct {
lon float64
lat float64
hash string
}{
{2.29449034, 48.85841131, "u09tunquc"},
{76.491540, 10.060349, "t9y3hx7my0fp"},
}
for _, test := range tests {
hash := EncodeGeoHash(test.lat, test.lon)
if !strings.HasPrefix(hash, test.hash) {
t.Errorf("expected hash %s, got %s", test.hash, hash)
}
}
}
================================================
FILE: geo/parse.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"reflect"
"strconv"
"strings"
"github.com/blevesearch/bleve/v2/util"
"github.com/blevesearch/geo/geojson"
)
// ExtractGeoPoint takes an arbitrary interface{} and tries it's best to
// interpret it is as geo point. Supported formats:
// Container:
// slice length 2 (GeoJSON)
//
// first element lon, second element lat
//
// string (coordinates separated by comma, or a geohash)
//
// first element lat, second element lon
//
// map[string]interface{}
//
// exact keys lat and lon or lng
//
// struct
//
// w/exported fields case-insensitive match on lat and lon or lng
//
// struct
//
// satisfying Later and Loner or Lnger interfaces
//
// in all cases values must be some sort of numeric-like thing: int/uint/float
func ExtractGeoPoint(thing interface{}) (lon, lat float64, success bool) {
var foundLon, foundLat bool
thingVal := reflect.ValueOf(thing)
if !thingVal.IsValid() {
return lon, lat, false
}
thingTyp := thingVal.Type()
// is it a slice
if thingVal.Kind() == reflect.Slice {
// must be length 2
if thingVal.Len() == 2 {
first := thingVal.Index(0)
if first.CanInterface() {
firstVal := first.Interface()
lon, foundLon = util.ExtractNumericValFloat64(firstVal)
}
second := thingVal.Index(1)
if second.CanInterface() {
secondVal := second.Interface()
lat, foundLat = util.ExtractNumericValFloat64(secondVal)
}
}
}
// is it a string
if thingVal.Kind() == reflect.String {
geoStr := thingVal.Interface().(string)
if strings.Contains(geoStr, ",") {
// geo point with coordinates split by comma
points := strings.Split(geoStr, ",")
for i, point := range points {
// trim any leading or trailing white spaces
points[i] = strings.TrimSpace(point)
}
if len(points) == 2 {
var err error
lat, err = strconv.ParseFloat(points[0], 64)
if err == nil {
foundLat = true
}
lon, err = strconv.ParseFloat(points[1], 64)
if err == nil {
foundLon = true
}
}
} else {
// geohash
if len(geoStr) <= geoHashMaxLength {
lat, lon = DecodeGeoHash(geoStr)
foundLat = true
foundLon = true
}
}
}
// is it a map
if l, ok := thing.(map[string]interface{}); ok {
if lval, ok := l["lon"]; ok {
lon, foundLon = util.ExtractNumericValFloat64(lval)
} else if lval, ok := l["lng"]; ok {
lon, foundLon = util.ExtractNumericValFloat64(lval)
}
if lval, ok := l["lat"]; ok {
lat, foundLat = util.ExtractNumericValFloat64(lval)
}
}
// now try reflection on struct fields
if thingVal.Kind() == reflect.Struct {
for i := 0; i < thingVal.NumField(); i++ {
fieldName := thingTyp.Field(i).Name
if strings.HasPrefix(strings.ToLower(fieldName), "lon") {
if thingVal.Field(i).CanInterface() {
fieldVal := thingVal.Field(i).Interface()
lon, foundLon = util.ExtractNumericValFloat64(fieldVal)
}
}
if strings.HasPrefix(strings.ToLower(fieldName), "lng") {
if thingVal.Field(i).CanInterface() {
fieldVal := thingVal.Field(i).Interface()
lon, foundLon = util.ExtractNumericValFloat64(fieldVal)
}
}
if strings.HasPrefix(strings.ToLower(fieldName), "lat") {
if thingVal.Field(i).CanInterface() {
fieldVal := thingVal.Field(i).Interface()
lat, foundLat = util.ExtractNumericValFloat64(fieldVal)
}
}
}
}
// last hope, some interfaces
// lon
if l, ok := thing.(loner); ok {
lon = l.Lon()
foundLon = true
} else if l, ok := thing.(lnger); ok {
lon = l.Lng()
foundLon = true
}
// lat
if l, ok := thing.(later); ok {
lat = l.Lat()
foundLat = true
}
return lon, lat, foundLon && foundLat
}
// various support interfaces which can be used to find lat/lon
type loner interface {
Lon() float64
}
type later interface {
Lat() float64
}
type lnger interface {
Lng() float64
}
// GlueBytes primarily for quicker filtering of docvalues
// during the filtering phase.
var GlueBytes = []byte("##")
var GlueBytesOffset = len(GlueBytes)
func extractCoordinates(thing interface{}) []float64 {
thingVal := reflect.ValueOf(thing)
if !thingVal.IsValid() {
return nil
}
if thingVal.Kind() == reflect.Slice {
// must be length 2
if thingVal.Len() == 2 {
var foundLon, foundLat bool
var lon, lat float64
first := thingVal.Index(0)
if first.CanInterface() {
firstVal := first.Interface()
lon, foundLon = util.ExtractNumericValFloat64(firstVal)
}
second := thingVal.Index(1)
if second.CanInterface() {
secondVal := second.Interface()
lat, foundLat = util.ExtractNumericValFloat64(secondVal)
}
if !foundLon || !foundLat {
return nil
}
return []float64{lon, lat}
}
}
return nil
}
func extract2DCoordinates(thing interface{}) [][]float64 {
thingVal := reflect.ValueOf(thing)
if !thingVal.IsValid() {
return nil
}
rv := make([][]float64, 0, 8)
if thingVal.Kind() == reflect.Slice {
for j := 0; j < thingVal.Len(); j++ {
edges := thingVal.Index(j).Interface()
if es, ok := edges.([]interface{}); ok {
v := extractCoordinates(es)
if len(v) == 2 {
rv = append(rv, v)
}
}
}
return rv
}
return nil
}
func extract3DCoordinates(thing interface{}) (c [][][]float64) {
coords := reflect.ValueOf(thing)
if !coords.IsValid() {
return nil
}
if coords.Kind() == reflect.Slice {
for i := 0; i < coords.Len(); i++ {
vals := coords.Index(i)
edges := vals.Interface()
if es, ok := edges.([]interface{}); ok {
loop := extract2DCoordinates(es)
if len(loop) > 0 {
c = append(c, loop)
}
}
}
}
return c
}
func extract4DCoordinates(thing interface{}) (rv [][][][]float64) {
thingVal := reflect.ValueOf(thing)
if !thingVal.IsValid() {
return nil
}
if thingVal.Kind() == reflect.Slice {
for j := 0; j < thingVal.Len(); j++ {
c := extract3DCoordinates(thingVal.Index(j).Interface())
rv = append(rv, c)
}
}
return rv
}
func ParseGeoShapeField(thing interface{}) (interface{}, string, error) {
thingVal := reflect.ValueOf(thing)
if !thingVal.IsValid() {
return nil, "", nil
}
var shape string
var coordValue interface{}
if thingVal.Kind() == reflect.Map {
iter := thingVal.MapRange()
for iter.Next() {
if iter.Key().String() == "type" {
shape = iter.Value().Interface().(string)
continue
}
if iter.Key().String() == "coordinates" {
coordValue = iter.Value().Interface()
}
}
}
return coordValue, strings.ToLower(shape), nil
}
func extractGeoShape(thing interface{}) (*geojson.GeoShape, bool) {
coordValue, typ, err := ParseGeoShapeField(thing)
if err != nil {
return nil, false
}
if typ == CircleType {
return ExtractCircle(thing)
}
return ExtractGeoShapeCoordinates(coordValue, typ)
}
// ExtractGeometryCollection takes an interface{} and tries it's best to
// interpret all the member geojson shapes within it.
func ExtractGeometryCollection(thing interface{}) ([]*geojson.GeoShape, bool) {
thingVal := reflect.ValueOf(thing)
if !thingVal.IsValid() {
return nil, false
}
var rv []*geojson.GeoShape
var f bool
if thingVal.Kind() == reflect.Map {
iter := thingVal.MapRange()
for iter.Next() {
if iter.Key().String() == "type" {
continue
}
if iter.Key().String() == "geometries" {
collection := iter.Value().Interface()
items := reflect.ValueOf(collection)
for j := 0; j < items.Len(); j++ {
shape, found := extractGeoShape(items.Index(j).Interface())
if found {
f = found
rv = append(rv, shape)
}
}
}
}
}
return rv, f
}
// ExtractCircle takes an interface{} and tries it's best to
// interpret the center point coordinates and the radius for a
// given circle shape.
func ExtractCircle(thing interface{}) (*geojson.GeoShape, bool) {
thingVal := reflect.ValueOf(thing)
if !thingVal.IsValid() {
return nil, false
}
rv := &geojson.GeoShape{
Type: CircleType,
Center: make([]float64, 0, 2),
}
if thingVal.Kind() == reflect.Map {
iter := thingVal.MapRange()
for iter.Next() {
if iter.Key().String() == "radius" {
rv.Radius = iter.Value().Interface().(string)
continue
}
if iter.Key().String() == "coordinates" {
lng, lat, found := ExtractGeoPoint(iter.Value().Interface())
if !found {
return nil, false
}
rv.Center = append(rv.Center, lng, lat)
}
}
}
return rv, true
}
// ExtractGeoShapeCoordinates takes an interface{} and tries it's best to
// interpret the coordinates for any of the given geoshape typ like
// a point, multipoint, linestring, multilinestring, polygon, multipolygon,
func ExtractGeoShapeCoordinates(coordValue interface{},
typ string) (*geojson.GeoShape, bool) {
rv := &geojson.GeoShape{
Type: typ,
}
if typ == PointType {
point := extractCoordinates(coordValue)
// ignore the contents with invalid entry.
if len(point) < 2 {
return nil, false
}
rv.Coordinates = [][][][]float64{{{point}}}
return rv, true
}
if typ == MultiPointType || typ == LineStringType ||
typ == EnvelopeType {
coords := extract2DCoordinates(coordValue)
// ignore the contents with invalid entry.
if len(coords) == 0 {
return nil, false
}
if typ == EnvelopeType && len(coords) != 2 {
return nil, false
}
if typ == LineStringType && len(coords) < 2 {
return nil, false
}
rv.Coordinates = [][][][]float64{{coords}}
return rv, true
}
if typ == PolygonType || typ == MultiLineStringType {
coords := extract3DCoordinates(coordValue)
// ignore the contents with invalid entry.
if len(coords) == 0 {
return nil, false
}
if typ == PolygonType && len(coords[0]) < 3 ||
typ == MultiLineStringType && len(coords[0]) < 2 {
return nil, false
}
rv.Coordinates = [][][][]float64{coords}
return rv, true
}
if typ == MultiPolygonType {
coords := extract4DCoordinates(coordValue)
// ignore the contents with invalid entry.
if len(coords) == 0 || len(coords[0]) == 0 {
return nil, false
}
if len(coords[0][0]) < 3 {
return nil, false
}
rv.Coordinates = coords
return rv, true
}
return rv, false
}
================================================
FILE: geo/parse_test.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"encoding/json"
"reflect"
"testing"
)
func TestExtractGeoPoint(t *testing.T) {
tests := []struct {
in interface{}
lon float64
lat float64
success bool
}{
// values are ints
{
in: map[string]interface{}{
"lat": 5,
"lon": 5,
},
lon: 5,
lat: 5,
success: true,
},
// values are uints
{
in: map[string]interface{}{
"lat": uint(5),
"lon": uint(5),
},
lon: 5,
lat: 5,
success: true,
},
// values float64 as with parsed JSON
{
in: map[string]interface{}{
"lat": 5.0,
"lon": 5.0,
},
lon: 5,
lat: 5,
success: true,
},
// values are bool (not supported)
{
in: map[string]interface{}{
"lat": true,
"lon": false,
},
lon: 0,
lat: 0,
success: false,
},
// using lng variant of lon
{
in: map[string]interface{}{
"lat": 5.0,
"lng": 5.0,
},
lon: 5,
lat: 5,
success: true,
},
// using struct
{
in: struct {
Lon float64
Lat float64
}{
Lon: 3.0,
Lat: 7.5,
},
lon: 3.0,
lat: 7.5,
success: true,
},
// struct with lng alternate
{
in: struct {
Lng float64
Lat float64
}{
Lng: 3.0,
Lat: 7.5,
},
lon: 3.0,
lat: 7.5,
success: true,
},
// test going through interface
{
in: &s11{
lon: 4.0,
lat: 6.9,
},
lon: 4.0,
lat: 6.9,
success: true,
},
// test going through interface with lng variant
{
in: &s12{
lng: 4.0,
lat: 6.9,
},
lon: 4.0,
lat: 6.9,
success: true,
},
// try GeoJSON slice
{
in: []interface{}{3.4, 5.9},
lon: 3.4,
lat: 5.9,
success: true,
},
// try GeoJSON slice too long
{
in: []interface{}{3.4, 5.9, 9.4},
lon: 0,
lat: 0,
success: false,
},
// slice of floats
{
in: []float64{3.4, 5.9},
lon: 3.4,
lat: 5.9,
success: true,
},
// values are nil (not supported)
{
in: map[string]interface{}{
"lat": nil,
"lon": nil,
},
lon: 0,
lat: 0,
success: false,
},
// input is nil
{
in: nil,
lon: 0,
lat: 0,
success: false,
},
}
for _, test := range tests {
lon, lat, success := ExtractGeoPoint(test.in)
if success != test.success {
t.Errorf("expected extract geo point %t, got %t for %v", test.success, success, test.in)
}
if lon != test.lon {
t.Errorf("expected lon %f, got %f for %v", test.lon, lon, test.in)
}
if lat != test.lat {
t.Errorf("expected lat %f, got %f for %v", test.lat, lat, test.in)
}
}
}
type s11 struct {
lon float64
lat float64
}
func (s *s11) Lon() float64 {
return s.lon
}
func (s *s11) Lat() float64 {
return s.lat
}
type s12 struct {
lng float64
lat float64
}
func (s *s12) Lng() float64 {
return s.lng
}
func (s *s12) Lat() float64 {
return s.lat
}
func TestExtractGeoShape(t *testing.T) {
tests := []struct {
in interface{}
resTyp string
coordinates [][][][]float64
center []float64
success bool
}{
// valid point slice
{
in: map[string]interface{}{
"coordinates": []interface{}{3.4, 5.9},
"type": "Point",
},
resTyp: "point",
coordinates: [][][][]float64{{{{3.4, 5.9}}}},
success: true,
},
// invalid point slice
{
in: map[string]interface{}{
"coordinates": []interface{}{3.4},
"type": "point"},
resTyp: "point",
coordinates: nil,
success: false,
},
// valid multipoint slice containing single point
{
in: map[string]interface{}{
"coordinates": [][]interface{}{{3.4, 5.9}},
"type": "multipoint"},
resTyp: "multipoint",
coordinates: [][][][]float64{{{{3.4, 5.9}}}},
success: true,
},
// valid multipoint slice
{
in: map[string]interface{}{
"coordinates": [][]interface{}{{3.4, 5.9}, {6.7, 9.8}},
"type": "multipoint"},
resTyp: "multipoint",
coordinates: [][][][]float64{{{{3.4, 5.9}, {6.7, 9.8}}}},
success: true,
},
// valid multipoint slice containing one invalid entry
{
in: map[string]interface{}{
"coordinates": [][]interface{}{{3.4, 5.9}, {6.7}},
"type": "multipoint"},
resTyp: "multipoint",
coordinates: [][][][]float64{{{{3.4, 5.9}}}},
success: true,
},
// invalid multipoint slice
{
in: map[string]interface{}{
"coordinates": [][]interface{}{{3.4}},
"type": "multipoint"},
resTyp: "multipoint",
coordinates: nil,
success: false,
},
// valid linestring slice
{
in: map[string]interface{}{
"coordinates": [][]interface{}{{3.4, 4.4}, {8.4, 9.4}},
"type": "linestring"},
resTyp: "linestring",
coordinates: [][][][]float64{{{{3.4, 4.4}, {8.4, 9.4}}}},
success: true,
},
// valid linestring slice
{
in: map[string]interface{}{
"coordinates": [][]interface{}{{3.4, 4.4}, {8.4, 9.4}, {10.1, 12.3}},
"type": "linestring"},
resTyp: "linestring",
coordinates: [][][][]float64{{{{3.4, 4.4}, {8.4, 9.4}, {10.1, 12.3}}}},
success: true,
},
// invalid linestring slice with single entry
{
in: map[string]interface{}{
"coordinates": [][]interface{}{{3.4, 4.4}},
"type": "linestring"},
resTyp: "linestring",
coordinates: nil,
success: false,
},
// invalid linestring slice with wrong parenthesis
{
in: map[string]interface{}{
"coordinates": [][][]interface{}{{{3.4, 4.4}, {8.4, 9.4}}},
"type": "linestring"},
resTyp: "linestring",
coordinates: nil,
success: false,
},
// valid envelope
{
in: map[string]interface{}{
"coordinates": [][]interface{}{{3.4, 4.4}, {8.4, 9.4}},
"type": "envelope"},
resTyp: "envelope",
coordinates: [][][][]float64{{{{3.4, 4.4}, {8.4, 9.4}}}},
success: true,
},
// invalid envelope
{
in: map[string]interface{}{
"coordinates": [][]interface{}{{3.4, 4.4}},
"type": "envelope"},
resTyp: "envelope",
coordinates: nil,
success: false,
},
// invalid envelope
{
in: map[string]interface{}{
"coordinates": [][][]interface{}{{{3.4, 4.4}, {8.4, 9.4}}},
"type": "envelope"},
resTyp: "envelope",
coordinates: nil,
success: false,
},
// invalid envelope with >2 vertices
{
in: map[string]interface{}{
"coordinates": [][]interface{}{{3.4, 4.4}, {5.6, 6.4}, {7.4, 7.4}},
"type": "envelope"},
resTyp: "envelope",
coordinates: nil,
success: false,
},
// valid circle
{
in: map[string]interface{}{
"coordinates": []interface{}{4.4, 5.0},
"radius": "200m",
"type": "circle"},
resTyp: "circle",
center: []float64{4.4, 5.0},
success: true,
},
// invalid circle
{
in: map[string]interface{}{
"coordinates": []interface{}{4.4},
"radius": "200m",
"type": "circle"},
resTyp: "circle",
success: false,
},
}
for _, test := range tests {
res, success := extractGeoShape(test.in)
if success != test.success {
t.Errorf("expected extract geo point: %t, got: %t for: %v", test.success, success, test.in)
}
if success && res.Type != test.resTyp {
t.Errorf("expected shape type: %v, got: %v for input: %v", test.resTyp, res.Type, test.in)
}
if success && !reflect.DeepEqual(test.coordinates, res.Coordinates) {
t.Errorf("expected result %+v, got %+v for %v", test.coordinates, res.Coordinates, test.in)
}
if success && test.center != nil && !reflect.DeepEqual(test.center, res.Center) {
t.Errorf("expected center %+v, got %+v for %v", test.center, res.Center, test.in)
}
}
}
func TestExtractGeoShapeCoordinates(t *testing.T) {
tests := []struct {
x []byte
typ string
expectOK bool
}{
{
x: []byte(`[
[
[77.58894681930542,12.976498523818783],
[77.58677959442139,12.974533005048169],
[77.58894681930542,12.976498523818783]
]
]`),
typ: PolygonType,
expectOK: true,
},
{ // Invalid construct, but handled
x: []byte(`[
[
{"lon":77.58894681930542,"lat":12.976498523818783},
{"lon":77.58677959442139,"lat":12.974533005048169},
{"lon":77.58894681930542,"lat":12.976498523818783}
]
]`),
typ: PolygonType,
expectOK: false,
},
{ // Invalid construct causes panic (within extract3DCoordinates), fix MB-65807
x: []byte(`{
"coordinates": [
[77.58894681930542,12.976498523818783],
[77.58677959442139,12.974533005048169],
[77.58894681930542,12.976498523818783]
]
}`),
typ: PolygonType,
expectOK: false,
},
{
x: []byte(`[
[
[
[-0.163421630859375,51.531600743186644],
[-0.15277862548828125,51.52455221546295],
[-0.15895843505859375,51.53693981046689],
[-0.163421630859375,51.531600743186644]
]
],
[
[
[-0.1902008056640625,51.5091698216777],
[-0.1599884033203125,51.51322956905176],
[-0.1902008056640625,51.5091698216777]
]
]
]`),
typ: MultiPolygonType,
expectOK: true,
},
{ // Invalid construct causes panic (within extract3DCoordinates), fix MB-65807
x: []byte(`[
{
"coordinates": [
[-0.163421630859375,51.531600743186644],
[-0.15277862548828125,51.52455221546295],
[-0.15895843505859375,51.53693981046689],
[-0.163421630859375,51.531600743186644]
]
},
{
"coordinates": [
[-0.1902008056640625,51.5091698216777],
[-0.1599884033203125,51.51322956905176],
[-0.1902008056640625,51.5091698216777]
]
}
]`),
typ: MultiPolygonType,
expectOK: false,
},
}
for i := range tests {
var x interface{}
if err := json.Unmarshal(tests[i].x, &x); err != nil {
t.Fatalf("[%d] JSON err: %v", i+1, err)
}
res, ok := ExtractGeoShapeCoordinates(x, tests[i].typ)
if ok != tests[i].expectOK {
t.Errorf("[%d] expected ok %t, got %t", i+1, tests[i].expectOK, ok)
}
if ok && res.Type != tests[i].typ {
t.Errorf("[%d] expected type %s, got %s", i+1, tests[i].typ, res.Type)
}
}
}
================================================
FILE: geo/sloppy.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"math"
)
var earthDiameterPerLatitude []float64
const (
radiusTabsSize = (1 << 10) + 1
radiusDelta = (math.Pi / 2) / (radiusTabsSize - 1)
radiusIndexer = 1 / radiusDelta
)
func init() {
// initializes the tables used for the sloppy math functions
// earth radius
a := 6378137.0
b := 6356752.31420
a2 := a * a
b2 := b * b
earthDiameterPerLatitude = make([]float64, radiusTabsSize)
earthDiameterPerLatitude[0] = 2.0 * a / 1000
earthDiameterPerLatitude[radiusTabsSize-1] = 2.0 * b / 1000
for i := 1; i < radiusTabsSize-1; i++ {
lat := math.Pi * float64(i) / (2*radiusTabsSize - 1)
one := math.Pow(a2*math.Cos(lat), 2)
two := math.Pow(b2*math.Sin(lat), 2)
three := math.Pow(float64(a)*math.Cos(lat), 2)
four := math.Pow(b*math.Sin(lat), 2)
radius := math.Sqrt((one + two) / (three + four))
earthDiameterPerLatitude[i] = 2 * radius / 1000
}
}
// earthDiameter returns an estimation of the earth's diameter at the specified
// latitude in kilometers
func earthDiameter(lat float64) float64 {
index := math.Mod(math.Abs(lat)*radiusIndexer+0.5, float64(len(earthDiameterPerLatitude)))
if math.IsNaN(index) {
return 0
}
return earthDiameterPerLatitude[int(index)]
}
================================================
FILE: geo/versus_test.go
================================================
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package geo
import (
"testing"
)
// This test basically confirms the dimensions of the
// bounded box computed between the DecodeGeoHash method
// and the popular implementation from
// https://github.com/mmcloughlin/geohash.
// DecodeGeoHash method returns the centre of the rectangle
// than returning the box dimensions.
// This test verifies that the returned rectangle centre matches
// the centre for the box dimensions defined in the original
// implementation tests here:
// https://github.com/mmcloughlin/geohash/blob/master/decodecases_test.go
func TestDecodeGeoHashVersus(t *testing.T) {
tests := []struct {
hash string
box []float64
}{
{"91rc", []float64{7.20703125, 7.3828125, -124.1015625, -123.75}},
{"c", []float64{45.0, 90.0, -135.0, -90.0}},
{"0fuz", []float64{-73.30078125, -73.125, -139.5703125, -139.21875}},
{"dwfcndf", []float64{38.1596374512, 38.1610107422, -63.3444213867, -63.3430480957}},
{"2z7", []float64{-4.21875, -2.8125, -142.03125, -140.625}},
{"7spw2w", []float64{-21.3684082031, -21.3629150391, -11.9311523438, -11.9201660156}},
{"eq", []float64{33.75, 39.375, -33.75, -22.5}},
{"mgff0", []float64{-23.5546875, -23.5107421875, 82.6171875, 82.6611328125}},
{"dp7k386jtk0", []float64{41.5306591988, 41.5306605399, -85.3607976437, -85.3607963026}},
{"pjb", []float64{-57.65625, -56.25, 135.0, 136.40625}},
{"jkc7uh9", []float64{-62.5973510742, -62.5959777832, 58.184967041, 58.186340332}},
{"1gdp9", []float64{-68.994140625, -68.9501953125, -98.3935546875, -98.349609375}},
{"z9yj14mmnxte", []float64{55.7359149121, 55.7359150797, 165.988941416, 165.988941751}},
{"2brk", []float64{-42.890625, -42.71484375, -136.0546875, -135.703125}},
{"dhv5t2qh59", []float64{27.3360496759, 27.3360550404, -82.7296471596, -82.7296364307}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"3fgd9k15b5k", []float64{-29.0691630542, -29.0691617131, -96.2718147039, -96.2718133628}},
{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
{"b4", []float64{56.25, 61.875, -180.0, -168.75}},
{"sb38", []float64{1.40625, 1.58203125, 35.859375, 36.2109375}},
{"puqeug", []float64{-65.4180908203, -65.4125976562, 178.099365234, 178.110351562}},
{"45", []float64{-73.125, -67.5, -90.0, -78.75}},
{"34b", []float64{-29.53125, -28.125, -135.0, -133.59375}},
{"tqb8jzn9dfqn", []float64{38.0074727163, 38.0074728839, 57.2148630023, 57.2148633376}},
{"9x", []float64{39.375, 45.0, -112.5, -101.25}},
{"tybf7", []float64{38.3642578125, 38.408203125, 79.9365234375, 79.98046875}},
{"9nc", []float64{37.96875, 39.375, -133.59375, -132.1875}},
{"pp21", []float64{-49.04296875, -48.8671875, 135.0, 135.3515625}},
{"s6wjfu76v", []float64{15.0970602036, 15.0971031189, 19.8130273819, 19.8130702972}},
{"wxh8ped7", []float64{39.3947410583, 39.3949127197, 119.160804749, 119.161148071}},
{"8gr", []float64{18.28125, 19.6875, -136.40625, -135.0}},
{"ug6hf", []float64{64.1162109375, 64.16015625, 36.650390625, 36.6943359375}},
{"pb", []float64{-90.0, -84.375, 168.75, 180.0}},
{"nmhvpv", []float64{-60.9686279297, -60.9631347656, 108.270263672, 108.28125}},
{"rxgthm", []float64{-0.499877929688, -0.494384765625, 162.608642578, 162.619628906}},
{"mj8t", []float64{-13.18359375, -13.0078125, 45.703125, 46.0546875}},
{"rkvw", []float64{-17.2265625, -17.05078125, 153.984375, 154.3359375}},
{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
{"u4ryw22k", []float64{58.8008880615, 58.8010597229, 11.1734390259, 11.1737823486}},
{"96bf6jfr", []float64{15.8970451355, 15.8972167969, -122.60433197, -122.603988647}},
{"ubhnbn2n1jvj", []float64{46.2219173647, 46.2219175324, 39.3750496209, 39.3750499561}},
{"3gmczz", []float64{-26.3726806641, -26.3671875, -92.8234863281, -92.8125}},
{"yb4jh3u7px0", []float64{45.8890718222, 45.8890731633, 126.75542593, 126.755427271}},
{"9ex7rkbw1duf", []float64{20.2859266475, 20.2859268151, -101.985326596, -101.98532626}},
{"xrkyg3mj", []float64{41.9754981995, 41.9756698608, 153.079376221, 153.079719543}},
{"z4hn2yfzryjq", []float64{57.3869894072, 57.3869895749, 140.662075169, 140.662075505}},
{"x357", []float64{6.15234375, 6.328125, 150.8203125, 151.171875}},
{"v3pew", []float64{51.240234375, 51.2841796875, 67.060546875, 67.1044921875}},
{"j1", []float64{-84.375, -78.75, 45.0, 56.25}},
{"ec1bkph03", []float64{5.70744037628, 5.70748329163, -8.60774517059, -8.60770225525}},
{"q4t85", []float64{-30.9375, -30.8935546875, 97.8662109375, 97.91015625}},
{"k26z8hf", []float64{-42.2492980957, -42.2479248047, 15.119934082, 15.121307373}},
{"p6fyq2u63", []float64{-73.4281110764, -73.428068161, 150.397725105, 150.397768021}},
{"uqu5w2yu", []float64{83.5887908936, 83.5889625549, 17.1589279175, 17.1592712402}},
{"terbkv", []float64{18.3526611328, 18.3581542969, 78.6071777344, 78.6181640625}},
{"8v2nwkp", []float64{30.6958007812, 30.6971740723, -145.96572876, -145.964355469}},
{"rbktxk2enhr", []float64{-42.6030693948, -42.6030680537, 175.397682041, 175.397683382}},
{"r1pnfct8", []float64{-38.1802368164, -38.180065155, 144.97215271, 144.972496033}},
{"rcmxg", []float64{-36.6064453125, -36.5625, 176.616210938, 176.66015625}},
{"jqw7xjdt8", []float64{-52.7911090851, -52.7910661697, 65.350112915, 65.3501558304}},
{"7mt737xd", []float64{-13.4716415405, -13.4714698792, -26.3019561768, -26.301612854}},
{"2dprx5rg57es", []float64{-32.4132534117, -32.413253244, -146.986283138, -146.986282803}},
{"fpr5d98ws0", []float64{86.40583992, 86.4058452845, -80.0455284119, -80.045517683}},
{"z4cmm3", []float64{61.3970947266, 61.4025878906, 136.988525391, 136.999511719}},
{"gz3b8j", []float64{85.8966064453, 85.9020996094, -8.7890625, -8.77807617188}},
{"svfztv42f2k", []float64{33.6897052824, 33.6897066236, 37.8730648756, 37.8730662167}},
{"z6f3yb6rum", []float64{60.7790976763, 60.7791030407, 149.713965654, 149.713976383}},
{"wvqv397", []float64{30.4609680176, 30.4623413086, 133.312225342, 133.313598633}},
{"r0ce65wshm", []float64{-40.1900213957, -40.1900160313, 137.206374407, 137.206385136}},
{"crqvbtfbjb09", []float64{86.8235780485, 86.8235782161, -114.23181586, -114.231815524}},
{"ptz0vc5z87", []float64{-57.5176173449, -57.5176119804, 167.601596117, 167.601606846}},
{"x806byp", []float64{0.516357421875, 0.517730712891, 157.894134521, 157.895507812}},
{"s", []float64{0.0, 45.0, 0.0, 45.0}},
{"kb23vjn10", []float64{-43.2584953308, -43.2584524155, 34.3295288086, 34.3295717239}},
{"n82kv", []float64{-87.7587890625, -87.71484375, 113.071289062, 113.115234375}},
{"dkhzmmqfg", []float64{23.8037252426, 23.803768158, -71.830201149, -71.8301582336}},
{"y1v", []float64{54.84375, 56.25, 97.03125, 98.4375}},
{"q977h9", []float64{-37.4359130859, -37.4304199219, 117.268066406, 117.279052734}},
{"sdffzu5qzu", []float64{15.9753012657, 15.9753066301, 26.7125594616, 26.7125701904}},
{"n3q", []float64{-82.96875, -81.5625, 109.6875, 111.09375}},
{"fr99zgs7e", []float64{87.5149440765, 87.5149869919, -76.2940835953, -76.2940406799}},
{"4d9cv4cbh", []float64{-75.6147766113, -75.614733696, -64.8167610168, -64.8167181015}},
{"np896wq5", []float64{-47.557926178, -47.5577545166, 90.8212280273, 90.8215713501}},
{"45", []float64{-73.125, -67.5, -90.0, -78.75}},
{"c5srsy", []float64{66.0388183594, 66.0443115234, -128.814697266, -128.803710938}},
{"vcshr", []float64{54.1845703125, 54.228515625, 84.6826171875, 84.7265625}},
{"3zbbs1wnn5", []float64{-1.30907356739, -1.30906820297, -100.011034012, -100.011023283}},
{"jtdvxn", []float64{-58.0627441406, -58.0572509766, 71.6748046875, 71.6857910156}},
{"yz27wu0e", []float64{86.4189720154, 86.4191436768, 124.398880005, 124.399223328}},
{"utb0ck65h9", []float64{77.4994522333, 77.4994575977, 22.5578713417, 22.5578820705}},
{"1t1dvq5jj", []float64{-61.3577842712, -61.3577413559, -110.15557766, -110.155534744}},
{"yzwgy5hvwz", []float64{87.8641408682, 87.8641462326, 133.512672186, 133.512682915}},
{"8dx38y6vby", []float64{14.3615233898, 14.3615287542, -147.267919779, -147.26790905}},
{"9bh", []float64{0.0, 1.40625, -95.625, -94.21875}},
{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
{"x14ve", []float64{6.591796875, 6.6357421875, 138.999023438, 139.04296875}},
{"3603v", []float64{-33.4423828125, -33.3984375, -123.178710938, -123.134765625}},
{"bsuje4cn6", []float64{72.7017259598, 72.7017688751, -151.741704941, -151.741662025}},
{"9nnut36epqhn", []float64{34.5484302565, 34.5484304242, -125.273349881, -125.273349546}},
{"7q27jg", []float64{-9.29992675781, -9.29443359375, -33.1457519531, -33.134765625}},
{"933zzd6", []float64{8.40591430664, 8.40728759766, -120.956726074, -120.955352783}},
{"5743y", []float64{-72.8173828125, -72.7734375, -30.322265625, -30.2783203125}},
{"7m94uc9", []float64{-13.5708618164, -13.5694885254, -32.1336364746, -32.1322631836}},
{"780", []float64{-45.0, -43.59375, -22.5, -21.09375}},
{"sqpqx2w0hh", []float64{34.8953461647, 34.8953515291, 21.7723274231, 21.7723381519}},
{"1ep", []float64{-73.125, -71.71875, -102.65625, -101.25}},
{"k0j", []float64{-45.0, -43.59375, 7.03125, 8.4375}},
{"z1zgr0g440", []float64{55.4195022583, 55.4195076227, 146.210260391, 146.21027112}},
{"681bf", []float64{-44.8681640625, -44.82421875, -64.951171875, -64.9072265625}},
{"dn6j", []float64{36.03515625, 36.2109375, -87.1875, -86.8359375}},
{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
{"t1m", []float64{7.03125, 8.4375, 52.03125, 53.4375}},
{"9qkshh67xe3", []float64{35.8833391964, 35.8833405375, -117.242680639, -117.242679298}},
{"4507gyj", []float64{-72.4328613281, -72.4314880371, -89.476776123, -89.475402832}},
{"bb844h", []float64{48.1860351562, 48.1915283203, -146.162109375, -146.151123047}},
{"trn6yz", []float64{39.8968505859, 39.90234375, 65.3356933594, 65.3466796875}},
{"vb99", []float64{47.98828125, 48.1640625, 80.859375, 81.2109375}},
{"dxpbksmed", []float64{39.4428920746, 39.4429349899, -56.3961696625, -56.3961267471}},
{"zc", []float64{50.625, 56.25, 168.75, 180.0}},
{"3webp3s6hdeb", []float64{-8.42890352011, -8.42890335247, -106.901924349, -106.901924014}},
{"8qg", []float64{37.96875, 39.375, -164.53125, -163.125}},
{"fgwzk", []float64{65.9619140625, 66.005859375, -46.58203125, -46.5380859375}},
{"1q2mxvd", []float64{-53.8467407227, -53.8453674316, -123.055114746, -123.053741455}},
{"sd", []float64{11.25, 16.875, 22.5, 33.75}},
{"8hhwv", []float64{23.6865234375, 23.73046875, -173.452148438, -173.408203125}},
{"1bw", []float64{-87.1875, -85.78125, -92.8125, -91.40625}},
{"66zkh0ex724", []float64{-28.824133873, -28.8241325319, -68.3739575744, -68.3739562333}},
{"w6qpy8", []float64{14.0185546875, 14.0240478516, 109.973144531, 109.984130859}},
{"nux13fvfr", []float64{-64.4522809982, -64.4522380829, 133.678851128, 133.678894043}},
{"cj2", []float64{74.53125, 75.9375, -135.0, -133.59375}},
{"1qrumeqp", []float64{-54.0776252747, -54.0774536133, -112.601623535, -112.601280212}},
{"hhm0", []float64{-66.09375, -65.91796875, 7.03125, 7.3828125}},
{"50cp37u6w86", []float64{-84.4858060777, -84.4858047366, -43.5327002406, -43.5326988995}},
{"0vjjfe5w", []float64{-60.8467483521, -60.8465766907, -139.1040802, -139.103736877}},
{"xm67", []float64{30.05859375, 30.234375, 149.4140625, 149.765625}},
{"1jmnqz1", []float64{-59.3316650391, -59.330291748, -127.67074585, -127.669372559}},
{"jjfh69u78", []float64{-56.8989658356, -56.8989229202, 47.9281997681, 47.9282426834}},
{"9xvnjtj3ytr", []float64{44.6762318909, 44.676233232, -105.219552666, -105.219551325}},
{"ebwk44", []float64{3.52661132812, 3.53210449219, -2.373046875, -2.36206054688}},
{"xej0p", []float64{16.875, 16.9189453125, 164.838867188, 164.8828125}},
{"hm4m", []float64{-60.99609375, -60.8203125, 14.4140625, 14.765625}},
{"71d1uhzq", []float64{-36.2277603149, -36.2275886536, -42.0017623901, -42.0014190674}},
{"u7d8cgc9h4w", []float64{64.8401203752, 64.8401217163, 14.8447689414, 14.8447702825}},
{"zwud", []float64{83.3203125, 83.49609375, 163.828125, 164.1796875}},
{"p4nbh7wnwsb", []float64{-78.7296326458, -78.7296313047, 144.687473774, 144.687475115}},
{"ev", []float64{28.125, 33.75, -11.25, 0.0}},
{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
{"ytj8jhg3vmke", []float64{73.1514216028, 73.1514217705, 120.458796099, 120.458796434}},
{"xn", []float64{33.75, 39.375, 135.0, 146.25}},
{"1t7f", []float64{-60.1171875, -59.94140625, -107.2265625, -106.875}},
{"wjt", []float64{30.9375, 32.34375, 97.03125, 98.4375}},
{"f7j3", []float64{62.05078125, 62.2265625, -71.3671875, -71.015625}},
{"62bd4mvcg", []float64{-40.3978013992, -40.3977584839, -77.9399728775, -77.9399299622}},
{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
{"577epg1nh0be", []float64{-71.1738922633, -71.1738920957, -28.4860032052, -28.4860028699}},
{"pvyz2qtjw6kc", []float64{-56.3451739959, -56.3451738283, 178.260314874, 178.26031521}},
{"2hrf6fbj", []float64{-20.6822776794, -20.6821060181, -168.980712891, -168.980369568}},
{"z1yz7p6dc3h8", []float64{56.1584669352, 56.1584671028, 144.627516344, 144.627516679}},
{"24v5r460td50", []float64{-28.9475047588, -28.9475045912, -172.658146173, -172.658145837}},
{"rdtnyvudc4mu", []float64{-29.7189060599, -29.7189058922, 164.834111296, 164.834111631}},
{"s", []float64{0.0, 45.0, 0.0, 45.0}},
{"e7he", []float64{17.40234375, 17.578125, -27.421875, -27.0703125}},
{"9yrkg", []float64{35.9912109375, 36.03515625, -90.9228515625, -90.87890625}},
{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
{"yppvw0", []float64{85.341796875, 85.3472900391, 101.162109375, 101.173095703}},
{"n3876wh", []float64{-80.9582519531, -80.9568786621, 101.716918945, 101.718292236}},
{"m1vjqf4", []float64{-34.2224121094, -34.2210388184, 52.3306274414, 52.3320007324}},
{"kev", []float64{-23.90625, -22.5, 29.53125, 30.9375}},
{"4qc", []float64{-52.03125, -50.625, -77.34375, -75.9375}},
{"5f4", []float64{-78.75, -77.34375, -8.4375, -7.03125}},
{"ug", []float64{61.875, 67.5, 33.75, 45.0}},
{"g5", []float64{61.875, 67.5, -45.0, -33.75}},
{"7wvx", []float64{-5.80078125, -5.625, -14.765625, -14.4140625}},
{"rx", []float64{-5.625, 0.0, 157.5, 168.75}},
{"jdg0t21qzr", []float64{-74.4421631098, -74.4421577454, 71.9514906406, 71.9515013695}},
{"606yg", []float64{-42.4072265625, -42.36328125, -86.0009765625, -85.95703125}},
{"nxt6zwhgqd", []float64{-47.2955739498, -47.2955685854, 120.219204426, 120.219215155}},
{"e71g7w8dr", []float64{17.482380867, 17.4824237823, -31.1342668533, -31.134223938}},
{"n6u", []float64{-74.53125, -73.125, 106.875, 108.28125}},
{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
{"zfcehd0d3", []float64{61.0074663162, 61.0075092316, 171.057858467, 171.057901382}},
{"hgx5efc24r5e", []float64{-69.68212137, -69.6821212023, 43.760362789, 43.7603631243}},
{"x", []float64{0.0, 45.0, 135.0, 180.0}},
{"q", []float64{-45.0, 0.0, 90.0, 135.0}},
{"zbzg87qu07g", []float64{49.8525439203, 49.8525452614, 179.668708295, 179.668709636}},
{"02urcn", []float64{-84.3859863281, -84.3804931641, -162.729492188, -162.718505859}},
{"p4", []float64{-78.75, -73.125, 135.0, 146.25}},
{"j6xjzcpsk78", []float64{-74.9205163121, -74.920514971, 66.4448082447, 66.4448095858}},
{"svchwhvd08d", []float64{33.1612041593, 33.1612055004, 35.4274991155, 35.4275004566}},
{"yughcgqek2", []float64{72.5721216202, 72.5721269846, 128.054763079, 128.054773808}},
{"18tc1b", []float64{-87.01171875, -87.0062255859, -104.337158203, -104.326171875}},
{"es43c", []float64{22.8076171875, 22.8515625, -19.2919921875, -19.248046875}},
{"zdcephk", []float64{61.0194396973, 61.0208129883, 159.922485352, 159.923858643}},
{"2zqh", []float64{-3.515625, -3.33984375, -137.8125, -137.4609375}},
{"sh9bfh31", []float64{25.4678535461, 25.4680252075, 2.55020141602, 2.55054473877}},
{"62vfgv", []float64{-40.2703857422, -40.2648925781, -70.4992675781, -70.48828125}},
{"u2yn4", []float64{50.2734375, 50.3173828125, 19.775390625, 19.8193359375}},
{"qx5wuf", []float64{-4.42749023438, -4.42199707031, 117.630615234, 117.641601562}},
{"9y163", []float64{34.1455078125, 34.189453125, -99.4482421875, -99.404296875}},
{"7ygu094y", []float64{-6.32160186768, -6.3214302063, -5.95081329346, -5.9504699707}},
{"nkfj9vxh9e6", []float64{-62.2834508121, -62.283449471, 104.149084389, 104.14908573}},
{"n220y9m", []float64{-88.4550476074, -88.4536743164, 101.542510986, 101.543884277}},
{"472p7k4d", []float64{-70.4220199585, -70.4218482971, -78.6037445068, -78.6034011841}},
{"7p", []float64{-5.625, 0.0, -45.0, -33.75}},
{"b1r1n2zp", []float64{52.2123527527, 52.2125244141, -169.87197876, -169.871635437}},
{"neu8fxsk8k4", []float64{-68.7324213982, -68.7324200571, 118.943838179, 118.94383952}},
{"m33fn", []float64{-37.6171875, -37.5732421875, 58.974609375, 59.0185546875}},
{"u6z5je", []float64{61.0125732422, 61.0180664062, 21.3354492188, 21.3464355469}},
{"5e6csnf", []float64{-71.4179992676, -71.4166259766, -18.454284668, -18.452911377}},
{"7f7k4", []float64{-31.640625, -31.5966796875, -6.591796875, -6.5478515625}},
{"ykd07ytm4", []float64{70.3930091858, 70.3930521011, 104.23459053, 104.234633446}},
{"ubyx97", []float64{50.5535888672, 50.5590820312, 42.9455566406, 42.9565429688}},
{"r", []float64{-45.0, 0.0, 135.0, 180.0}},
{"ungw16", []float64{84.0344238281, 84.0399169922, 4.97680664062, 4.98779296875}},
{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
{"bhfkepk0n", []float64{72.5495910645, 72.5496339798, -176.698350906, -176.698307991}},
{"d3", []float64{5.625, 11.25, -78.75, -67.5}},
{"wsenn", []float64{26.3671875, 26.4111328125, 116.982421875, 117.026367188}},
{"b4b6qpqk", []float64{60.9047698975, 60.9049415588, -179.376182556, -179.375839233}},
{"zwjx2wv3", []float64{80.0616645813, 80.0618362427, 165.263557434, 165.263900757}},
{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
{"3sgq6", []float64{-17.1826171875, -17.138671875, -107.841796875, -107.797851562}},
{"9chbq", []float64{5.6689453125, 5.712890625, -94.306640625, -94.2626953125}},
{"37nehnbt", []float64{-27.5597190857, -27.5595474243, -114.432907104, -114.432563782}},
{"uhr5w71b", []float64{69.5379638672, 69.5381355286, 10.1208114624, 10.1211547852}},
{"8", []float64{0.0, 45.0, -180.0, -135.0}},
{"81", []float64{5.625, 11.25, -180.0, -168.75}},
{"vyxn", []float64{82.6171875, 82.79296875, 88.59375, 88.9453125}},
{"73jvv", []float64{-38.3642578125, -38.3203125, -25.4443359375, -25.400390625}},
{"nmw1ysg2f23", []float64{-58.7286601961, -58.728658855, 109.977705628, 109.977706969}},
{"5sv6js2nphq", []float64{-62.9052887857, -62.9052874446, -14.8751798272, -14.8751784861}},
{"289", []float64{-42.1875, -40.78125, -156.09375, -154.6875}},
{"cq9", []float64{81.5625, 82.96875, -122.34375, -120.9375}},
{"mm", []float64{-16.875, -11.25, 56.25, 67.5}},
{"49378fm9v", []float64{-82.3408555984, -82.3408126831, -65.7014608383, -65.701417923}},
{"ee079be", []float64{17.492980957, 17.494354248, -22.0674133301, -22.0660400391}},
{"4m5cs3h", []float64{-61.6058349609, -61.6044616699, -73.2843017578, -73.2829284668}},
{"hpdre", []float64{-46.494140625, -46.4501953125, 3.2958984375, 3.33984375}},
{"06", []float64{-78.75, -73.125, -168.75, -157.5}},
{"3x3r4evkpp1q", []float64{-2.9669566825, -2.96695651487, -110.624812357, -110.624812022}},
{"95074fyh23t", []float64{17.4181875587, 17.4181888998, -134.51933071, -134.519329369}},
{"5tdj7sf", []float64{-58.1135559082, -58.1121826172, -19.5309448242, -19.5295715332}},
{"8r9z", []float64{43.41796875, 43.59375, -166.2890625, -165.9375}},
{"nc", []float64{-84.375, -78.75, 123.75, 135.0}},
{"t2mb", []float64{1.40625, 1.58203125, 64.3359375, 64.6875}},
{"hcs1g2vbkq0g", []float64{-81.2506873347, -81.250687167, 39.525902085, 39.5259024203}},
{"pduzpxt", []float64{-73.2595825195, -73.2582092285, 164.516143799, 164.51751709}},
{"mw9u700", []float64{-7.6904296875, -7.68905639648, 70.0927734375, 70.0941467285}},
{"9y0ttsfr", []float64{34.7440910339, 34.7442626953, -100.302085876, -100.301742554}},
{"ztmu17de4", []float64{75.2541160583, 75.2541589737, 165.644388199, 165.644431114}},
{"1dqmehx", []float64{-76.3522338867, -76.3508605957, -103.569488525, -103.568115234}},
{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
{"6ydvkt6cp", []float64{-7.48563766479, -7.48559474945, -52.180981636, -52.1809387207}},
{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
{"706tn", []float64{-42.71484375, -42.6708984375, -41.220703125, -41.1767578125}},
{"6z89j2x0gun", []float64{-2.63382196426, -2.63382062316, -55.3063800931, -55.306378752}},
{"mt1t", []float64{-15.99609375, -15.8203125, 69.609375, 69.9609375}},
{"95ypdkpcd", []float64{22.4343395233, 22.4343824387, -126.452894211, -126.452851295}},
{"nr1wc", []float64{-49.4384765625, -49.39453125, 103.403320312, 103.447265625}},
{"rb2uxf4", []float64{-42.7917480469, -42.7903747559, 170.148010254, 170.149383545}},
{"gzz5w20e1wk", []float64{89.2095328867, 89.2095342278, -1.13083541393, -1.13083407283}},
{"mtuddkh7my", []float64{-12.1942341328, -12.1942287683, 73.9330852032, 73.933095932}},
{"r15def4", []float64{-38.9245605469, -38.9231872559, 140.089416504, 140.090789795}},
{"9fwcy0d", []float64{14.3728637695, 14.3742370605, -91.491394043, -91.490020752}},
{"6vx9z80hkd", []float64{-13.7541425228, -13.7541371584, -45.3733420372, -45.3733313084}},
{"qqxt55re", []float64{-7.54022598267, -7.54005432129, 111.93901062, 111.939353943}},
{"4hreqy", []float64{-65.4895019531, -65.4840087891, -79.1564941406, -79.1455078125}},
{"9nv6", []float64{38.3203125, 38.49609375, -127.6171875, -127.265625}},
{"k980q", []float64{-36.5185546875, -36.474609375, 22.763671875, 22.8076171875}},
{"zd6qmdhsrtce", []float64{58.7666300498, 58.7666302174, 160.912265405, 160.91226574}},
{"ucz57k", []float64{55.4370117188, 55.4425048828, 43.7365722656, 43.7475585938}},
{"u5pq", []float64{62.9296875, 63.10546875, 10.1953125, 10.546875}},
{"fey0v9", []float64{66.2310791016, 66.2365722656, -58.8208007812, -58.8098144531}},
{"mdtggek9fmh5", []float64{-30.2601397969, -30.2601396292, 75.7460278273, 75.7460281625}},
{"y6nfy", []float64{56.7333984375, 56.77734375, 111.005859375, 111.049804688}},
{"f", []float64{45.0, 90.0, -90.0, -45.0}},
{"33hz5xbsw", []float64{-38.1011867523, -38.101143837, -116.915559769, -116.915516853}},
{"2wnb71890np2", []float64{-11.1976110935, -11.1976109259, -147.875280194, -147.875279859}},
{"7d", []float64{-33.75, -28.125, -22.5, -11.25}},
{"71vsdbh", []float64{-34.365234375, -34.363861084, -37.1392822266, -37.1379089355}},
{"nk", []float64{-67.5, -61.875, 101.25, 112.5}},
{"pn6uks", []float64{-54.0747070312, -54.0692138672, 139.064941406, 139.075927734}},
{"t0", []float64{0.0, 5.625, 45.0, 56.25}},
{"ze7c4z3e", []float64{63.4973716736, 63.497543335, 162.896347046, 162.896690369}},
{"ujq6txghfjdv", []float64{75.0141208805, 75.0141210482, 9.03497111052, 9.0349714458}},
{"40muh9sfm7", []float64{-87.8819829226, -87.8819775581, -81.7095601559, -81.709549427}},
{"0y4kk1", []float64{-55.4974365234, -55.4919433594, -142.91015625, -142.899169922}},
{"q8btfscp1g", []float64{-39.7431975603, -39.7431921959, 113.314436674, 113.314447403}},
{"yg2mxt", []float64{64.2755126953, 64.2810058594, 124.431152344, 124.442138672}},
{"7r5kh", []float64{-4.921875, -4.8779296875, -29.00390625, -28.9599609375}},
{"cvg6t", []float64{77.783203125, 77.8271484375, -96.4599609375, -96.416015625}},
{"msj4q7e3db", []float64{-22.0850086212, -22.0850032568, 74.8104894161, 74.810500145}},
{"1452n4", []float64{-78.7390136719, -78.7335205078, -130.166015625, -130.155029297}},
{"xj3kvwr2d", []float64{30.4006290436, 30.4006719589, 137.009553909, 137.009596825}},
{"2wtvb", []float64{-7.4267578125, -7.3828125, -149.4140625, -149.370117188}},
{"c5x5dhk", []float64{65.3260803223, 65.3274536133, -125.062866211, -125.06149292}},
{"eph56u6", []float64{39.9696350098, 39.9710083008, -39.2514038086, -39.2500305176}},
{"5dhyg0hckf", []float64{-77.5632512569, -77.5632458925, -15.6817495823, -15.6817388535}},
{"9vrb", []float64{29.53125, 29.70703125, -90.3515625, -90.0}},
{"b7nsys", []float64{62.7319335938, 62.7374267578, -159.323730469, -159.312744141}},
{"kmenbsk0", []float64{-12.8526306152, -12.8524589539, 15.4962158203, 15.4965591431}},
{"j35", []float64{-84.375, -82.96875, 60.46875, 61.875}},
{"e7sx", []float64{20.91796875, 21.09375, -27.421875, -27.0703125}},
{"h87mpg", []float64{-87.6983642578, -87.6928710938, 27.4108886719, 27.421875}},
{"qjbtp", []float64{-11.77734375, -11.7333984375, 91.0107421875, 91.0546875}},
{"4zqs2pndx", []float64{-48.4327983856, -48.4327554703, -47.100148201, -47.1001052856}},
{"1fsc5v3", []float64{-75.7328796387, -75.7315063477, -94.4041442871, -94.4027709961}},
{"kp6hptx", []float64{-3.48541259766, -3.48403930664, 3.15170288086, 3.15307617188}},
{"3n77", []float64{-9.31640625, -9.140625, -130.4296875, -130.078125}},
{"q347uc", []float64{-38.7103271484, -38.7048339844, 104.622802734, 104.633789062}},
{"n8gckvg3", []float64{-85.5297660828, -85.5295944214, 117.98664093, 117.986984253}},
{"p7szbr6ceq", []float64{-68.9100801945, -68.9100748301, 152.944589853, 152.944600582}},
{"8w7n", []float64{36.2109375, 36.38671875, -153.28125, -152.9296875}},
{"k4s3ndj8", []float64{-30.7507324219, -30.7505607605, 6.26976013184, 6.27010345459}},
{"fh38ev", []float64{69.0216064453, 69.0270996094, -87.7258300781, -87.71484375}},
{"rzebrsw", []float64{-2.74383544922, -2.7424621582, 174.36126709, 174.362640381}},
{"un", []float64{78.75, 84.375, 0.0, 11.25}},
{"27u3d4ybbkt", []float64{-23.6273190379, -23.6273176968, -162.676259726, -162.676258385}},
{"5hk2", []float64{-66.09375, -65.91796875, -39.0234375, -38.671875}},
{"62f6wqsbfc6n", []float64{-40.3059548512, -40.3059546836, -75.3046354651, -75.3046351299}},
{"r6jvqxczv", []float64{-32.7832460403, -32.783203125, 154.624199867, 154.624242783}},
{"wyg1es5yx", []float64{38.2555103302, 38.2555532455, 128.128008842, 128.128051758}},
{"smp9qbcpnt", []float64{28.3500748873, 28.3500802517, 22.0951581001, 22.095168829}},
{"4q46h", []float64{-55.8984375, -55.8544921875, -75.41015625, -75.3662109375}},
{"9u6v9g2ebcm5", []float64{24.8915505968, 24.8915507644, -97.3051826656, -97.3051823303}},
{"25mwbs3", []float64{-25.5088806152, -25.5075073242, -172.242279053, -172.240905762}},
{"kwf", []float64{-7.03125, -5.625, 25.3125, 26.71875}},
{"ekqgkknev", []float64{24.5001554489, 24.5001983643, -24.0619039536, -24.0618610382}},
{"y9974617cb12", []float64{53.9764738083, 53.9764739759, 114.358482845, 114.35848318}},
{"htuu1sxcpd", []float64{-56.9282233715, -56.9282180071, 29.2565703392, 29.256581068}},
{"150sv8q", []float64{-72.2886657715, -72.2872924805, -134.046936035, -134.045562744}},
{"j36p6wnmqm", []float64{-81.6604489088, -81.6604435444, 59.181214571, 59.1812252998}},
{"py", []float64{-56.25, -50.625, 168.75, 180.0}},
{"8", []float64{0.0, 45.0, -180.0, -135.0}},
{"y87", []float64{46.40625, 47.8125, 116.71875, 118.125}},
{"6v90bwn999", []float64{-13.8974422216, -13.8974368572, -54.8127865791, -54.8127758503}},
{"8kyq", []float64{27.7734375, 27.94921875, -159.9609375, -159.609375}},
{"8cht765b92zg", []float64{6.55892824754, 6.55892841518, -139.773838855, -139.77383852}},
{"nu5jwk23v7f", []float64{-66.5095366538, -66.5095353127, 128.243979514, 128.243980855}},
{"0m10969", []float64{-61.7733764648, -61.7720031738, -167.287445068, -167.286071777}},
{"s29h49frdp", []float64{3.52656304836, 3.52656841278, 12.7692890167, 12.7692997456}},
{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
{"brmmeg2z8r", []float64{86.7672246695, 86.7672300339, -161.201351881, -161.201341152}},
{"r3bngg619d", []float64{-33.9516055584, -33.951600194, 146.417605877, 146.417616606}},
{"rmz76wced8c", []float64{-12.0472772419, -12.0472759008, 156.557344347, 156.557345688}},
{"z", []float64{45.0, 90.0, 135.0, 180.0}},
{"2h", []float64{-22.5, -16.875, -180.0, -168.75}},
{"ty2764x", []float64{35.7412719727, 35.7426452637, 79.1990661621, 79.2004394531}},
{"5yh3330r", []float64{-56.0235786438, -56.0234069824, -5.21816253662, -5.21781921387}},
{"9szz", []float64{27.94921875, 28.125, -101.6015625, -101.25}},
{"x7d41b", []float64{20.0390625, 20.0445556641, 149.139404297, 149.150390625}},
{"dw", []float64{33.75, 39.375, -67.5, -56.25}},
{"gnd4cw", []float64{82.0788574219, 82.0843505859, -42.1215820312, -42.1105957031}},
{"k9bxc2n8", []float64{-33.7939453125, -33.7937736511, 23.2669830322, 23.267326355}},
{"hump4nk", []float64{-64.8289489746, -64.8275756836, 40.8746337891, 40.8760070801}},
{"gkz", []float64{71.71875, 73.125, -23.90625, -22.5}},
{"g9e08yt", []float64{53.5610961914, 53.5624694824, -18.2414245605, -18.2400512695}},
{"3eyuyzpm", []float64{-23.0319786072, -23.0318069458, -102.701225281, -102.700881958}},
{"utpuc59p4m", []float64{73.9804154634, 73.9804208279, 33.443852663, 33.4438633919}},
{"cnqt", []float64{81.03515625, 81.2109375, -125.859375, -125.5078125}},
{"z05xfy72gk0", []float64{46.3967871666, 46.3967885077, 140.04732728, 140.047328621}},
{"skr4zd", []float64{24.4006347656, 24.4061279297, 21.4233398438, 21.4343261719}},
{"h2fe8mdnq", []float64{-85.1347303391, -85.1346874237, 14.7796154022, 14.7796583176}},
{"z18b", []float64{53.4375, 53.61328125, 136.0546875, 136.40625}},
{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
{"vh", []float64{67.5, 73.125, 45.0, 56.25}},
{"v64zfspngxw", []float64{57.6354762912, 57.6354776323, 60.2368220687, 60.2368234098}},
{"w", []float64{0.0, 45.0, 90.0, 135.0}},
{"d800", []float64{0.0, 0.17578125, -67.5, -67.1484375}},
{"s", []float64{0.0, 45.0, 0.0, 45.0}},
{"bnugeje1", []float64{83.6143684387, 83.6145401001, -173.184356689, -173.184013367}},
{"46", []float64{-78.75, -73.125, -78.75, -67.5}},
{"8rsncsbz2mx4", []float64{43.4013903514, 43.401390519, -163.058031946, -163.058031611}},
{"t4w9wh7f98je", []float64{14.3499474786, 14.3499476463, 54.4095184654, 54.4095188007}},
{"nsr1tp2", []float64{-65.7902526855, -65.7888793945, 122.563476562, 122.564849854}},
{"9me", []float64{30.9375, 32.34375, -119.53125, -118.125}},
{"t8250bh2y", []float64{1.93372249603, 1.93376541138, 67.5390529633, 67.5390958786}},
{"1", []float64{-90.0, -45.0, -135.0, -90.0}},
{"uqty0nmvvj0c", []float64{82.652533818, 82.6525339857, 19.3440495059, 19.3440498412}},
{"7nkxt", []float64{-8.525390625, -8.4814453125, -38.4521484375, -38.408203125}},
{"jzev", []float64{-46.93359375, -46.7578125, 84.0234375, 84.375}},
{"dmtj1fktxe", []float64{31.8297261, 31.8297314644, -71.6353440285, -71.6353332996}},
{"0r", []float64{-50.625, -45.0, -168.75, -157.5}},
{"5hqv273y", []float64{-65.152015686, -65.1518440247, -35.4944229126, -35.4940795898}},
{"78k", []float64{-43.59375, -42.1875, -16.875, -15.46875}},
{"f2krt5", []float64{47.7410888672, 47.7465820312, -72.5537109375, -72.5427246094}},
{"ffw63hhzm2u", []float64{59.481229037, 59.4812303782, -47.4102383852, -47.4102370441}},
{"mv3z5k", []float64{-14.2163085938, -14.2108154297, 81.3537597656, 81.3647460938}},
{"f15", []float64{50.625, 52.03125, -85.78125, -84.375}},
{"j710re25dhzs", []float64{-73.0625749379, -73.0625747703, 57.9859357327, 57.985936068}},
{"rtt328k93", []float64{-13.8411855698, -13.8411426544, 164.911007881, 164.911050797}},
{"d2x003t048", []float64{2.82073974609, 2.82074511051, -68.8882899284, -68.8882791996}},
{"22uy0", []float64{-39.7265625, -39.6826171875, -162.0703125, -162.026367188}},
{"tuzxx", []float64{28.037109375, 28.0810546875, 89.6044921875, 89.6484375}},
{"su44fdqr2pv1", []float64{22.9970443435, 22.9970445111, 36.6809530556, 36.6809533909}},
{"yttq", []float64{76.9921875, 77.16796875, 119.8828125, 120.234375}},
{"9fu5vyr", []float64{16.1622619629, 16.1636352539, -95.362701416, -95.361328125}},
{"zwmzk37wsh97", []float64{81.4386709593, 81.438671127, 165.777684934, 165.77768527}},
{"hd777ygzewj", []float64{-76.7340624332, -76.7340610921, 27.2404141724, 27.2404155135}},
{"0b1", []float64{-90.0, -88.59375, -144.84375, -143.4375}},
{"ejmgd", []float64{30.146484375, 30.1904296875, -36.826171875, -36.7822265625}},
{"rzxh6", []float64{-2.0654296875, -2.021484375, 178.681640625, 178.725585938}},
{"0rf4f4u", []float64{-45.9077453613, -45.9063720703, -165.844116211, -165.84274292}},
{"1m23ggbv8", []float64{-60.1395893097, -60.1395463943, -123.23261261, -123.232569695}},
{"gdvt2y6wfv", []float64{61.4271193743, 61.4271247387, -14.7291147709, -14.7291040421}},
{"wxb3eqeug0y0", []float64{43.8939468563, 43.8939470239, 112.9996714, 112.999671735}},
{"516kngbm6", []float64{-82.2441244125, -82.2440814972, -41.5388774872, -41.5388345718}},
{"xtuwe2zu", []float64{33.4911346436, 33.4913063049, 163.981590271, 163.981933594}},
{"dhb1c2jrz6c1", []float64{27.027712483, 27.0277126506, -89.9375461042, -89.9375457689}},
{"23c397n4v8g", []float64{-34.8756225407, -34.8756211996, -166.928776056, -166.928774714}},
{"1w81bnmc3", []float64{-53.0953359604, -53.095293045, -112.492060661, -112.492017746}},
{"03hu77r", []float64{-83.6100769043, -83.6087036133, -161.917877197, -161.916503906}},
{"z2vrm", []float64{50.4931640625, 50.537109375, 153.852539062, 153.896484375}},
{"q630e", []float64{-32.255859375, -32.2119140625, 102.788085938, 102.83203125}},
{"h9uzt", []float64{-78.837890625, -78.7939453125, 29.3994140625, 29.443359375}},
{"x09c9jz", []float64{3.10775756836, 3.10913085938, 137.51449585, 137.515869141}},
{"8", []float64{0.0, 45.0, -180.0, -135.0}},
{"xv9ee3gq2j", []float64{31.5634471178, 31.5634524822, 171.006660461, 171.00667119}},
{"6e8xjp4", []float64{-24.0435791016, -24.0422058105, -66.5744018555, -66.5730285645}},
{"0m8sj439bys", []float64{-58.3466801047, -58.3466787636, -167.82505095, -167.825049609}},
{"khf7yq8js6", []float64{-17.5854098797, -17.5854045153, 3.43890309334, 3.43891382217}},
{"hh", []float64{-67.5, -61.875, 0.0, 11.25}},
{"kcyx9b0rd65e", []float64{-33.8365919329, -33.8365917653, 42.967973873, 42.9679742083}},
{"qcy4pees", []float64{-34.7847747803, -34.7846031189, 132.521896362, 132.522239685}},
{"tc6", []float64{7.03125, 8.4375, 81.5625, 82.96875}},
{"mxhnk2fkh", []float64{-4.52156066895, -4.5215177536, 73.3150291443, 73.3150720596}},
{"1mggmg5x0k", []float64{-57.067258358, -57.0672529936, -118.219059706, -118.219048977}},
{"f1udt102z", []float64{55.2888250351, 55.2888679504, -83.4515047073, -83.451461792}},
{"jjz", []float64{-57.65625, -56.25, 54.84375, 56.25}},
{"1q1dg8peze2", []float64{-55.765940398, -55.7659390569, -121.476194859, -121.476193517}},
{"604unch", []float64{-44.2913818359, -44.2900085449, -85.8306884766, -85.8293151855}},
{"kt", []float64{-16.875, -11.25, 22.5, 33.75}},
{"wpbgc", []float64{44.2529296875, 44.296875, 91.0986328125, 91.142578125}},
{"c", []float64{45.0, 90.0, -135.0, -90.0}},
{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
{"tz2pky3c", []float64{42.0901679993, 42.0903396606, 78.9611434937, 78.9614868164}},
{"j96v3y", []float64{-82.0129394531, -82.0074462891, 71.4440917969, 71.455078125}},
{"vs974dv", []float64{70.8549499512, 70.8563232422, 69.3745422363, 69.3759155273}},
{"dunwchdt7d6", []float64{23.712155968, 23.7121573091, -47.061843574, -47.0618422329}},
{"h2u2nkksw", []float64{-85.7571315765, -85.7570886612, 17.5076580048, 17.5077009201}},
{"z8qtu1", []float64{47.4224853516, 47.4279785156, 166.81640625, 166.827392578}},
{"2677dsdngh", []float64{-31.7026162148, -31.7026108503, -164.066948891, -164.066938162}},
{"4yy", []float64{-52.03125, -50.625, -47.8125, -46.40625}},
{"uym", []float64{80.15625, 81.5625, 40.78125, 42.1875}},
{"m2mssd", []float64{-42.7917480469, -42.7862548828, 64.1821289062, 64.1931152344}},
{"xed3b", []float64{19.9951171875, 20.0390625, 160.6640625, 160.708007812}},
{"rfp4ky", []float64{-33.3215332031, -33.3160400391, 178.802490234, 178.813476562}},
{"83fmm9e", []float64{10.7748413086, 10.7762145996, -165.340118408, -165.338745117}},
{"tr7z2dqh", []float64{42.0687103271, 42.0688819885, 61.5536499023, 61.5539932251}},
{"crsh", []float64{87.890625, 88.06640625, -118.125, -117.7734375}},
{"f", []float64{45.0, 90.0, -90.0, -45.0}},
{"761m6h", []float64{-32.8051757812, -32.7996826172, -31.904296875, -31.8933105469}},
{"7p", []float64{-5.625, 0.0, -45.0, -33.75}},
{"8qu9r5", []float64{38.2049560547, 38.2104492188, -162.114257812, -162.103271484}},
{"5z", []float64{-50.625, -45.0, -11.25, 0.0}},
{"kbz", []float64{-40.78125, -39.375, 43.59375, 45.0}},
{"zdftsw1rbpsf", []float64{61.4698768035, 61.4698769711, 161.21510189, 161.215102226}},
{"n1m105r", []float64{-82.7751159668, -82.7737426758, 97.0408630371, 97.0422363281}},
{"xf4kktnxsz", []float64{12.0258611441, 12.0258665085, 172.120946646, 172.120957375}},
{"kzqyq4m0qtyu", []float64{-3.10768313706, -3.10768296942, 43.5130138323, 43.5130141675}},
{"1j1vy57w7", []float64{-60.8453321457, -60.8452892303, -132.27045536, -132.270412445}},
{"nq4u3", []float64{-55.5029296875, -55.458984375, 105.161132812, 105.205078125}},
{"bhcy", []float64{72.7734375, 72.94921875, -177.5390625, -177.1875}},
{"vjr", []float64{74.53125, 75.9375, 54.84375, 56.25}},
{"uc99nrdsntv", []float64{53.6551974714, 53.6551988125, 36.1377520859, 36.137753427}},
{"3e8zmnz", []float64{-24.0010070801, -23.9996337891, -111.2159729, -111.214599609}},
{"j0yzdvs9", []float64{-84.4325065613, -84.4323348999, 54.6192169189, 54.6195602417}},
{"8chvm4", []float64{6.55883789062, 6.56433105469, -139.350585938, -139.339599609}},
{"ywf6099wx", []float64{83.329668045, 83.3297109604, 115.6883955, 115.688438416}},
{"b", []float64{45.0, 90.0, -180.0, -135.0}},
{"zrc", []float64{88.59375, 90.0, 147.65625, 149.0625}},
{"zq", []float64{78.75, 84.375, 146.25, 157.5}},
{"7xznu50ru", []float64{-0.201916694641, -0.201873779297, -12.4799537659, -12.4799108505}},
{"u", []float64{45.0, 90.0, 0.0, 45.0}},
{"7r", []float64{-5.625, 0.0, -33.75, -22.5}},
{"f27k", []float64{47.109375, 47.28515625, -74.1796875, -73.828125}},
{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
{"b4s13188yv", []float64{59.2906218767, 59.2906272411, -174.330078363, -174.330067635}},
{"q8", []float64{-45.0, -39.375, 112.5, 123.75}},
{"skn7w", []float64{23.115234375, 23.1591796875, 20.302734375, 20.3466796875}},
{"1tzqwzyh5vp", []float64{-56.4703863859, -56.4703850448, -101.999646574, -101.999645233}},
{"r52cmn", []float64{-26.4660644531, -26.4605712891, 136.274414062, 136.285400391}},
{"7bwvbbbru", []float64{-41.1713075638, -41.1712646484, -1.72433853149, -1.72429561615}},
{"ruv0b1n", []float64{-18.1439208984, -18.1425476074, 175.789489746, 175.790863037}},
{"vwf01ujm", []float64{82.9915809631, 82.9917526245, 70.3966140747, 70.3969573975}},
{"0metgxjtrm91", []float64{-58.0123747699, -58.0123746023, -163.666450828, -163.666450493}},
{"2w", []float64{-11.25, -5.625, -157.5, -146.25}},
{"8kmch3", []float64{24.0875244141, 24.0930175781, -160.477294922, -160.466308594}},
{"g6m", []float64{57.65625, 59.0625, -26.71875, -25.3125}},
{"t6v4k2", []float64{15.8642578125, 15.8697509766, 63.4680175781, 63.4790039062}},
{"zr02vfju8hd", []float64{84.5186188817, 84.5186202228, 146.862147152, 146.862148494}},
{"kb8jn751", []float64{-41.2919425964, -41.2917709351, 34.0287780762, 34.0291213989}},
{"mj76", []float64{-15.1171875, -14.94140625, 49.5703125, 49.921875}},
{"f0hwcbkwjnr", []float64{46.1889602244, 46.1889615655, -83.5885669291, -83.588565588}},
{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
{"6rt9btpxj2p1", []float64{-2.47621519491, -2.47621502727, -70.9831179678, -70.9831176326}},
{"wh0s6yb6j", []float64{23.2844924927, 23.284535408, 90.8245325089, 90.8245754242}},
{"65b", []float64{-23.90625, -22.5, -90.0, -88.59375}},
{"r", []float64{-45.0, 0.0, 135.0, 180.0}},
{"z0tpc5d", []float64{49.1940307617, 49.1954040527, 142.077941895, 142.079315186}},
{"fgs2w0", []float64{64.775390625, 64.7808837891, -50.009765625, -49.9987792969}},
{"vfus35qgp", []float64{61.2341880798, 61.2342309952, 85.1316404343, 85.1316833496}},
{"hywnkqdye", []float64{-52.3020458221, -52.3020029068, 42.3781728745, 42.3782157898}},
{"qpwxwun0b", []float64{-1.47203922272, -1.47199630737, 99.4454956055, 99.4455385208}},
{"v88u9rqr24", []float64{48.6445963383, 48.6446017027, 68.6182022095, 68.6182129383}},
{"17x06x4fyw", []float64{-70.2295982838, -70.2295929193, -113.792331219, -113.79232049}},
{"3ykkhjyhrt", []float64{-9.1082829237, -9.10827755928, -95.0890946388, -95.08908391}},
{"mbzkuqw5", []float64{-39.910068512, -39.9098968506, 89.1403198242, 89.140663147}},
{"yqjf0p3mn", []float64{79.1422462463, 79.1422891617, 109.337911606, 109.337954521}},
{"2f", []float64{-33.75, -28.125, -146.25, -135.0}},
{"hqsm3", []float64{-52.5146484375, -52.470703125, 17.2705078125, 17.314453125}},
{"gp1y4wtft1tp", []float64{85.4658314399, 85.4658316076, -42.4210815132, -42.4210811779}},
{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
{"kxxruvh", []float64{-1.42272949219, -1.42135620117, 32.9095458984, 32.9109191895}},
{"08h0ft4vk97r", []float64{-89.839789141, -89.8397889733, -151.761162691, -151.761162356}},
{"y9u8", []float64{54.84375, 55.01953125, 118.828125, 119.1796875}},
{"3zhsk", []float64{-4.8779296875, -4.833984375, -94.74609375, -94.7021484375}},
{"x2", []float64{0.0, 5.625, 146.25, 157.5}},
{"mr7nnvr", []float64{-3.13522338867, -3.13385009766, 60.7749938965, 60.7763671875}},
{"d3g0pn54hr", []float64{9.87708985806, 9.87709522247, -74.2193305492, -74.2193198204}},
{"jv173u", []float64{-61.2817382812, -61.2762451172, 80.5847167969, 80.595703125}},
{"vte", []float64{75.9375, 77.34375, 71.71875, 73.125}},
{"303un61j2s", []float64{-42.878715992, -42.8787106276, -132.263009548, -132.262998819}},
{"m528h", []float64{-26.71875, -26.6748046875, 45.87890625, 45.9228515625}},
{"8", []float64{0.0, 45.0, -180.0, -135.0}},
{"p8h", []float64{-90.0, -88.59375, 163.125, 164.53125}},
{"tzusgm6v33y", []float64{44.4584606588, 44.4584619999, 85.2247855067, 85.2247868478}},
{"26sre", []float64{-29.619140625, -29.5751953125, -162.641601562, -162.59765625}},
{"q1tcnkmh55b", []float64{-36.3626660407, -36.3626646996, 98.3675909042, 98.3675922453}},
{"xs4t1g3", []float64{23.3967590332, 23.3981323242, 161.093902588, 161.095275879}},
{"td", []float64{11.25, 16.875, 67.5, 78.75}},
{"xdvwxsmsjd", []float64{16.6353714466, 16.635376811, 165.571753979, 165.571764708}},
{"dnw", []float64{36.5625, 37.96875, -81.5625, -80.15625}},
{"u7nu2ff43vx", []float64{62.6375922561, 62.6375935972, 20.777977556, 20.7779788971}},
{"0jy2", []float64{-57.65625, -57.48046875, -171.2109375, -170.859375}},
{"u", []float64{45.0, 90.0, 0.0, 45.0}},
{"2yx2k", []float64{-8.3935546875, -8.349609375, -135.87890625, -135.834960938}},
{"g3", []float64{50.625, 56.25, -33.75, -22.5}},
{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
{"vrrpw34jfjs", []float64{87.1061190963, 87.1061204374, 66.3712459803, 66.3712473214}},
{"g2gkzeutg", []float64{50.0752973557, 50.075340271, -28.8437891006, -28.8437461853}},
{"z8ett4zh", []float64{48.7950897217, 48.7952613831, 162.6512146, 162.651557922}},
{"cyspv2k0", []float64{82.9261779785, 82.9263496399, -95.3887939453, -95.3884506226}},
{"fqu7qbshmr", []float64{83.5435527563, 83.5435581207, -72.471088171, -72.4710774422}},
{"578u4ww7", []float64{-69.5731544495, -69.5729827881, -32.5768661499, -32.5765228271}},
{"4gzkwxrzb", []float64{-68.0740785599, -68.0740356445, -45.7583999634, -45.758357048}},
{"g0z038npm1ym", []float64{49.2639500834, 49.263950251, -35.0818693265, -35.0818689913}},
{"vrse4ey62b1y", []float64{87.7358303592, 87.7358305268, 62.6966058835, 62.6966062188}},
{"7x2fkbbj6", []float64{-3.81822109222, -3.81817817688, -21.2364864349, -21.2364435196}},
{"tuvfdxy7b", []float64{27.2014188766, 27.201461792, 86.9543838501, 86.9544267654}},
{"9qzggb", []float64{38.6279296875, 38.6334228516, -112.686767578, -112.67578125}},
{"pupf6", []float64{-67.1044921875, -67.060546875, 179.736328125, 179.780273438}},
{"fknyrbe41k0w", []float64{68.6017451808, 68.6017453484, -68.9130621403, -68.9130618051}},
{"591vk5jz7x4v", []float64{-83.4343860112, -83.4343858436, -19.8552309349, -19.8552305996}},
{"n4psdv", []float64{-77.9315185547, -77.9260253906, 100.667724609, 100.678710938}},
{"zyw2", []float64{81.5625, 81.73828125, 177.5390625, 177.890625}},
{"r0e29", []float64{-42.099609375, -42.0556640625, 139.614257812, 139.658203125}},
{"j9cq7f", []float64{-79.0466308594, -79.0411376953, 69.4226074219, 69.43359375}},
{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
{"m42x20h", []float64{-31.0693359375, -31.0679626465, 45.7086181641, 45.7099914551}},
{"405zg3kz2j", []float64{-88.6295574903, -88.6295521259, -84.5772171021, -84.5772063732}},
{"7vu7httsdm", []float64{-12.0978945494, -12.097889185, -5.06803393364, -5.0680232048}},
{"prznz91", []float64{-45.2142333984, -45.2128601074, 156.424713135, 156.426086426}},
{"46dupvqwbsuj", []float64{-75.2043508552, -75.2043506876, -74.5332831144, -74.5332827792}},
{"7s0", []float64{-22.5, -21.09375, -22.5, -21.09375}},
{"5nhz5er2nnm5", []float64{-55.0016444363, -55.0016442686, -38.1562833488, -38.1562830135}},
{"5qqp", []float64{-53.61328125, -53.4375, -25.3125, -24.9609375}},
{"t4dqn2", []float64{15.1171875, 15.1226806641, 48.4387207031, 48.4497070312}},
{"gmxt", []float64{76.81640625, 76.9921875, -23.203125, -22.8515625}},
{"syuh0ke1syh", []float64{38.6968839169, 38.696885258, 39.3903154135, 39.3903167546}},
{"nvr0", []float64{-60.46875, -60.29296875, 133.59375, 133.9453125}},
{"g4", []float64{56.25, 61.875, -45.0, -33.75}},
{"gnd2kb6w0s32", []float64{81.6088713706, 81.6088715382, -41.623740904, -41.6237405688}},
{"x7hptvsgdvu", []float64{18.2242034376, 18.2242047787, 152.134332061, 152.134333402}},
{"2fc8", []float64{-29.53125, -29.35546875, -144.140625, -143.7890625}},
{"e3fsu48yte7", []float64{10.693577081, 10.6935784221, -30.057323724, -30.0573223829}},
{"pbqhjvqh", []float64{-87.8610992432, -87.8609275818, 177.448425293, 177.448768616}},
{"sqx1nwgn", []float64{36.7763900757, 36.7765617371, 21.3835144043, 21.3838577271}},
{"e6g", []float64{15.46875, 16.875, -29.53125, -28.125}},
{"dt", []float64{28.125, 33.75, -67.5, -56.25}},
{"02s64yzk", []float64{-86.7981719971, -86.7980003357, -162.642631531, -162.642288208}},
{"z9", []float64{50.625, 56.25, 157.5, 168.75}},
{"fjv", []float64{77.34375, 78.75, -82.96875, -81.5625}},
{"5yc0", []float64{-52.03125, -51.85546875, -9.84375, -9.4921875}},
{"y", []float64{45.0, 90.0, 90.0, 135.0}},
{"k3x3j1pmdnfp", []float64{-36.3802440651, -36.3802438974, 21.6750839353, 21.6750842705}},
{"g2pjds", []float64{45.9887695312, 45.9942626953, -23.7963867188, -23.7854003906}},
{"ppw0p3q2gzbk", []float64{-47.8054625541, -47.8054623865, 143.764847852, 143.764848188}},
{"9xwp0kprs0x6", []float64{43.4412318841, 43.4412320517, -104.041375928, -104.041375592}},
{"0gzww7fv7gj", []float64{-67.7421551943, -67.7421538532, -135.424522609, -135.424521267}},
{"sgpt2", []float64{17.7978515625, 17.841796875, 44.296875, 44.3408203125}},
{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
{"rr", []float64{-5.625, 0.0, 146.25, 157.5}},
{"wkqrtvj5", []float64{25.2525901794, 25.2527618408, 110.298614502, 110.298957825}},
{"ngtfyx77u", []float64{-69.7886323929, -69.7885894775, 132.126216888, 132.126259804}},
{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
{"58y", []float64{-85.78125, -84.375, -14.0625, -12.65625}},
{"mvjrxvx", []float64{-15.5264282227, -15.5250549316, 86.483001709, 86.484375}},
{"jgve4ttu", []float64{-68.3480072021, -68.3478355408, 86.6021347046, 86.6024780273}},
{"h9", []float64{-84.375, -78.75, 22.5, 33.75}},
{"ytf9sb6mj27", []float64{77.609654814, 77.6096561551, 116.227684468, 116.227685809}},
{"rk9kv3q", []float64{-18.8456726074, -18.8442993164, 148.246765137, 148.248138428}},
{"kbq8t", []float64{-43.505859375, -43.4619140625, 43.1103515625, 43.154296875}},
{"j8prqp8kx", []float64{-88.6836147308, -88.6835718155, 77.9596281052, 77.9596710205}},
{"8", []float64{0.0, 45.0, -180.0, -135.0}},
{"mq53k", []float64{-11.0302734375, -10.986328125, 60.99609375, 61.0400390625}},
{"d95fw", []float64{6.064453125, 6.1083984375, -61.962890625, -61.9189453125}},
{"x15j3", []float64{6.5478515625, 6.591796875, 139.262695312, 139.306640625}},
{"x0yg7k2b", []float64{4.81338500977, 4.81355667114, 144.636039734, 144.636383057}},
{"dx8x9yxwprk5", []float64{43.5426343046, 43.5426344723, -66.7093545198, -66.7093541846}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"429ptwcscc3", []float64{-85.8312396705, -85.8312383294, -77.0999144018, -77.0999130607}},
{"ncknjt", []float64{-81.8865966797, -81.8811035156, 129.616699219, 129.627685547}},
{"7ce8p70yc7h", []float64{-36.5448457003, -36.5448443592, -6.00843250751, -6.00843116641}},
{"un1", []float64{78.75, 80.15625, 1.40625, 2.8125}},
{"b6rv0shhd5", []float64{58.5579174757, 58.5579228401, -157.824010849, -157.82400012}},
{"qyg8d", []float64{-6.943359375, -6.8994140625, 128.759765625, 128.803710938}},
{"3p7f2mvx6", []float64{-3.79041194916, -3.79036903381, -129.707937241, -129.707894325}},
{"u0vj", []float64{50.09765625, 50.2734375, 7.03125, 7.3828125}},
{"49m", []float64{-82.96875, -81.5625, -60.46875, -59.0625}},
{"7vhu8hp00j2", []float64{-16.0619835556, -16.0619822145, -4.56069946289, -4.56069812179}},
{"3sh", []float64{-22.5, -21.09375, -106.875, -105.46875}},
{"sexg40hc", []float64{20.2150154114, 20.2151870728, 33.4928512573, 33.4931945801}},
{"4uwxw8u", []float64{-63.365020752, -63.3636474609, -46.8182373047, -46.8168640137}},
{"mr", []float64{-5.625, 0.0, 56.25, 67.5}},
{"1kymkgft79d3", []float64{-62.3368896358, -62.3368894681, -114.748610817, -114.748610482}},
{"3tj", []float64{-16.875, -15.46875, -105.46875, -104.0625}},
{"vxfzth5", []float64{89.9340820312, 89.9354553223, 71.5910339355, 71.5924072266}},
{"h", []float64{-90.0, -45.0, 0.0, 45.0}},
{"e5r9", []float64{18.45703125, 18.6328125, -34.453125, -34.1015625}},
{"hq9bc8pmfk", []float64{-53.3046555519, -53.3046501875, 13.7869083881, 13.786919117}},
{"05t", []float64{-70.3125, -68.90625, -172.96875, -171.5625}},
{"ye6yg", []float64{64.4677734375, 64.51171875, 116.499023438, 116.54296875}},
{"gg4k", []float64{62.578125, 62.75390625, -8.0859375, -7.734375}},
{"50q2fcp", []float64{-88.4564208984, -88.4550476074, -36.0804748535, -36.0791015625}},
{"2hu8d3131", []float64{-18.1876945496, -18.1876516342, -173.571238518, -173.571195602}},
{"08gcdmy84ewg", []float64{-85.4859731533, -85.4859729856, -152.118642814, -152.118642479}},
{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
{"t05v4wzmqmet", []float64{0.91691667214, 0.916916839778, 50.3935300559, 50.3935303912}},
{"3nc", []float64{-7.03125, -5.625, -133.59375, -132.1875}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"3gvk6zm1869", []float64{-23.1190833449, -23.1190820038, -93.7394593656, -93.7394580245}},
{"39rruq", []float64{-36.5734863281, -36.5679931641, -102.117919922, -102.106933594}},
{"jhkxvqt7q5z", []float64{-64.6951617301, -64.6951603889, 51.5663145483, 51.5663158894}},
{"bb", []float64{45.0, 50.625, -146.25, -135.0}},
{"es26styfpb", []float64{24.3776321411, 24.3776375055, -21.9410812855, -21.9410705566}},
{"500q98r9", []float64{-88.8558769226, -88.8557052612, -44.5722198486, -44.5718765259}},
{"kv", []float64{-16.875, -11.25, 33.75, 45.0}},
{"njf", []float64{-57.65625, -56.25, 92.8125, 94.21875}},
{"whk5vu", []float64{24.5874023438, 24.5928955078, 95.8776855469, 95.888671875}},
{"w9pq4yk4p4qf", []float64{6.71437550336, 6.714375671, 122.821964733, 122.821965069}},
{"yt", []float64{73.125, 78.75, 112.5, 123.75}},
{"ccztdek", []float64{55.8283996582, 55.8297729492, -90.5877685547, -90.5863952637}},
{"ej33gfth2", []float64{29.8533296585, 29.8533725739, -43.070526123, -43.0704832077}},
{"y", []float64{45.0, 90.0, 90.0, 135.0}},
{"hqyfjm", []float64{-51.6522216797, -51.6467285156, 20.9729003906, 20.9838867188}},
{"njcr", []float64{-56.42578125, -56.25, 91.7578125, 92.109375}},
{"48ejbxwrk6", []float64{-86.1343038082, -86.1342984438, -63.2505118847, -63.2505011559}},
{"78f1r09e5v8", []float64{-40.558232367, -40.5582310259, -19.3776619434, -19.3776606023}},
{"ged8fvuhk31", []float64{64.8516565561, 64.8516578972, -18.8578484952, -18.8578471541}},
{"8ss3fn", []float64{25.6530761719, 25.6585693359, -151.435546875, -151.424560547}},
{"v90sp4e6", []float64{51.3422012329, 51.3423728943, 68.5152053833, 68.5155487061}},
{"bx00h848", []float64{84.375, 84.3751716614, -157.298812866, -157.298469543}},
{"9y3", []float64{35.15625, 36.5625, -99.84375, -98.4375}},
{"ehpkg7", []float64{23.3514404297, 23.3569335938, -34.6618652344, -34.6508789062}},
{"r38623wxhs", []float64{-36.1575293541, -36.1575239897, 146.621668339, 146.621679068}},
{"x6yex2zx", []float64{16.0893058777, 16.0894775391, 155.719528198, 155.719871521}},
{"r", []float64{-45.0, 0.0, 135.0, 180.0}},
{"9", []float64{0.0, 45.0, -135.0, -90.0}},
{"6w", []float64{-11.25, -5.625, -67.5, -56.25}},
{"mx6g2d", []float64{-3.63647460938, -3.63098144531, 71.3891601562, 71.4001464844}},
{"vmsh9b6f", []float64{76.7302322388, 76.7304039001, 61.9556808472, 61.9560241699}},
{"7uqbsm728bjw", []float64{-20.9769334272, -20.9769332595, -1.56654216349, -1.56654182822}},
{"nvtqqh0jc3", []float64{-57.9409021139, -57.9408967495, 131.396538019, 131.396548748}},
{"x8", []float64{0.0, 5.625, 157.5, 168.75}},
{"nqx5n", []float64{-52.91015625, -52.8662109375, 111.357421875, 111.401367188}},
{"c4wmv60xtud", []float64{60.0855401158, 60.0855414569, -125.979288518, -125.979287177}},
{"t9", []float64{5.625, 11.25, 67.5, 78.75}},
{"e3nqrsk9fqdu", []float64{6.74731470644, 6.74731487408, -24.6250675991, -24.6250672638}},
{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
{"7x9", []float64{-2.8125, -1.40625, -21.09375, -19.6875}},
{"760qb6yxf", []float64{-32.5470399857, -32.5469970703, -33.3784389496, -33.3783960342}},
{"x7skftff", []float64{20.5543899536, 20.554561615, 152.340202332, 152.340545654}},
{"jv774u", []float64{-59.9194335938, -59.9139404297, 83.4411621094, 83.4521484375}},
{"phh7", []float64{-66.97265625, -66.796875, 140.9765625, 141.328125}},
{"hv4tt0pymy", []float64{-60.9070980549, -60.9070926905, 37.4962413311, 37.4962520599}},
{"3zemdehyubj", []float64{-1.82806491852, -1.82806357741, -96.563090533, -96.5630891919}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"dep80ett", []float64{16.8950843811, 16.8952560425, -56.9235992432, -56.9232559204}},
{"bbj", []float64{45.0, 46.40625, -139.21875, -137.8125}},
{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"9k1t0ecqh3v3", []float64{23.4005451389, 23.4005453065, -121.616746299, -121.616745964}},
{"mm5", []float64{-16.875, -15.46875, 60.46875, 61.875}},
{"jz4zmmv2", []float64{-49.3190002441, -49.3188285828, 82.8551101685, 82.8554534912}},
{"bpshcry", []float64{88.065032959, 88.06640625, -174.311828613, -174.310455322}},
{"z", []float64{45.0, 90.0, 135.0, 180.0}},
{"e87b", []float64{1.40625, 1.58203125, -17.2265625, -16.875}},
{"pcphekd1ph0", []float64{-83.5590720177, -83.5590706766, 178.739619255, 178.739620596}},
{"nrwq0", []float64{-46.7578125, -46.7138671875, 110.0390625, 110.083007812}},
{"6k3e7rk823cj", []float64{-20.4825823568, -20.4825821891, -76.4916108549, -76.4916105196}},
{"kkv059k5v", []float64{-18.2737398148, -18.2736968994, 18.4407663345, 18.4408092499}},
{"ep7x2t4t6x", []float64{42.084068656, 42.0840740204, -40.0526118279, -40.052601099}},
{"43hh0", []float64{-83.671875, -83.6279296875, -73.125, -73.0810546875}},
{"rdhfdg0qee", []float64{-33.2929354906, -33.2929301262, 164.301030636, 164.301041365}},
{"znku45j7p", []float64{80.8763694763, 80.8764123917, 141.77508831, 141.775131226}},
{"ju", []float64{-67.5, -61.875, 78.75, 90.0}},
{"b6zckuz", []float64{60.7145690918, 60.7159423828, -157.633209229, -157.631835938}},
{"fm7m", []float64{75.41015625, 75.5859375, -74.1796875, -73.828125}},
{"8xg", []float64{43.59375, 45.0, -153.28125, -151.875}},
{"wfk7q", []float64{13.2275390625, 13.271484375, 129.990234375, 130.034179688}},
{"6r1p9yz6z9", []float64{-4.26908433437, -4.26907896996, -77.2565674782, -77.2565567493}},
{"t", []float64{0.0, 45.0, 45.0, 90.0}},
{"fuzw5", []float64{72.7734375, 72.8173828125, -45.5712890625, -45.52734375}},
{"m33ehjv", []float64{-37.4098205566, -37.4084472656, 58.5420227051, 58.5433959961}},
{"s6stp", []float64{14.94140625, 14.9853515625, 17.8857421875, 17.9296875}},
{"tjxuvxh", []float64{31.8109130859, 31.812286377, 56.1456298828, 56.1470031738}},
{"0vgxezccsf", []float64{-56.2950503826, -56.2950450182, -141.160722971, -141.160712242}},
{"0h", []float64{-67.5, -61.875, -180.0, -168.75}},
{"ge6bb", []float64{63.4130859375, 63.45703125, -18.6328125, -18.5888671875}},
{"h2gyy9", []float64{-84.5892333984, -84.5837402344, 16.8090820312, 16.8200683594}},
{"g0f7xg", []float64{49.8504638672, 49.8559570312, -41.4953613281, -41.484375}},
{"ujfk5", []float64{78.046875, 78.0908203125, 3.2958984375, 3.33984375}},
{"q0b6rwm0", []float64{-40.3514099121, -40.3512382507, 90.6880187988, 90.6883621216}},
{"d", []float64{0.0, 45.0, -90.0, -45.0}},
{"0nyhwsx2g5", []float64{-51.2153702974, -51.215364933, -171.266770363, -171.266759634}},
{"mjpe", []float64{-16.34765625, -16.171875, 55.546875, 55.8984375}},
{"mt", []float64{-16.875, -11.25, 67.5, 78.75}},
{"z49vj0d4zz", []float64{59.9446624517, 59.9446678162, 137.683743238, 137.683753967}},
{"zry97vd3gs", []float64{88.8440108299, 88.8440161943, 155.55866003, 155.558670759}},
{"c", []float64{45.0, 90.0, -135.0, -90.0}},
{"v3syrvc9", []float64{54.5678901672, 54.5680618286, 63.2723236084, 63.2726669312}},
{"nrer", []float64{-46.58203125, -46.40625, 105.8203125, 106.171875}},
{"2hqt8nf65v5e", []float64{-20.0895036198, -20.0895034522, -170.856119469, -170.856119134}},
{"wekgdxc", []float64{18.9390563965, 18.9404296875, 119.290924072, 119.292297363}},
{"6bh43q", []float64{-44.5715332031, -44.5660400391, -50.5700683594, -50.5590820312}},
{"d564", []float64{18.6328125, 18.80859375, -87.1875, -86.8359375}},
{"m85", []float64{-45.0, -43.59375, 71.71875, 73.125}},
{"x4tj5rb77r", []float64{14.9845737219, 14.9845790863, 142.174555063, 142.174565792}},
{"0bwycz8vr", []float64{-85.9588766098, -85.9588336945, -136.679577827, -136.679534912}},
{"ehnu2e", []float64{23.2635498047, 23.2690429688, -35.4858398438, -35.4748535156}},
{"jbe8mk", []float64{-87.1215820312, -87.1160888672, 83.9025878906, 83.9135742188}},
{"ss", []float64{22.5, 28.125, 22.5, 33.75}},
{"8edywpv3ygj3", []float64{20.8729668148, 20.8729669824, -153.361634128, -153.361633793}},
{"xpxu1erq390", []float64{42.9095560312, 42.9095573723, 145.974376202, 145.974377543}},
{"4043qb91kj", []float64{-89.7772854567, -89.7772800922, -86.5377616882, -86.5377509594}},
{"2n7q0j0r", []float64{-8.76039505005, -8.76022338867, -175.429344177, -175.429000854}},
{"cm0n96q3vn8t", []float64{74.2802738585, 74.2802740261, -123.686270043, -123.686269708}},
{"50hgpxg", []float64{-89.4300842285, -89.4287109375, -37.9866027832, -37.9852294922}},
{"2m", []float64{-16.875, -11.25, -168.75, -157.5}},
{"w4s1zj8n9", []float64{14.4014453888, 14.4014883041, 95.9326601028, 95.9327030182}},
{"hxzh", []float64{-45.703125, -45.52734375, 32.34375, 32.6953125}},
{"cn", []float64{78.75, 84.375, -135.0, -123.75}},
{"dpt5k8", []float64{42.7587890625, 42.7642822266, -82.7709960938, -82.7600097656}},
{"gz", []float64{84.375, 90.0, -11.25, 0.0}},
{"d09knt", []float64{3.54309082031, 3.54858398438, -87.9565429688, -87.9455566406}},
{"mrucw", []float64{-1.142578125, -1.0986328125, 63.193359375, 63.2373046875}},
{"055egqf4y", []float64{-72.4282693863, -72.4282264709, -174.93229866, -174.932255745}},
{"qphu", []float64{-4.921875, -4.74609375, 96.6796875, 97.03125}},
{"dfeh", []float64{14.765625, 14.94140625, -52.03125, -51.6796875}},
{"00qfn4ctp57", []float64{-88.2262055576, -88.2262042165, -170.241776258, -170.241774917}},
{"q9xx27p2trn", []float64{-35.2714830637, -35.2714817226, 123.06805104, 123.068052381}},
{"10bhfgfsj8m", []float64{-84.9250017107, -84.9250003695, -134.875474423, -134.875473082}},
{"6k", []float64{-22.5, -16.875, -78.75, -67.5}},
{"zyvjr4m37", []float64{83.9041757584, 83.9042186737, 176.096205711, 176.096248627}},
{"0c8k7gb", []float64{-80.7948303223, -80.7934570312, -145.733642578, -145.732269287}},
{"n7k", []float64{-71.71875, -70.3125, 106.875, 108.28125}},
{"fpj4eecz", []float64{84.8362541199, 84.8364257812, -82.812538147, -82.8121948242}},
{"ev", []float64{28.125, 33.75, -11.25, 0.0}},
{"8y", []float64{33.75, 39.375, -146.25, -135.0}},
{"x9xd1q", []float64{8.82202148438, 8.82751464844, 168.101806641, 168.112792969}},
{"x", []float64{0.0, 45.0, 135.0, 180.0}},
{"d1pmefm0t", []float64{6.60424232483, 6.60428524017, -79.6328115463, -79.632768631}},
{"tmy", []float64{32.34375, 33.75, 64.6875, 66.09375}},
{"phu6jzv0z", []float64{-62.8869867325, -62.8869438171, 141.236414909, 141.236457825}},
{"zv7pjxpuwjbq", []float64{75.8009752259, 75.8009753935, 173.221350051, 173.221350387}},
{"9gegu1", []float64{20.3521728516, 20.3576660156, -95.80078125, -95.7897949219}},
{"tq33", []float64{35.33203125, 35.5078125, 58.0078125, 58.359375}},
{"e", []float64{0.0, 45.0, -45.0, 0.0}},
{"y6z1sy", []float64{60.7653808594, 60.7708740234, 111.302490234, 111.313476562}},
{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
{"q", []float64{-45.0, 0.0, 90.0, 135.0}},
{"mtyyw7", []float64{-11.4971923828, -11.4916992188, 77.2668457031, 77.2778320312}},
{"2sqmb", []float64{-20.0830078125, -20.0390625, -148.7109375, -148.666992188}},
{"yp487k", []float64{84.4409179688, 84.4464111328, 93.6584472656, 93.6694335938}},
{"nyn5k1fc", []float64{-55.668926239, -55.6687545776, 132.3670578, 132.367401123}},
{"gnqk77v", []float64{80.9239196777, 80.9252929688, -36.0612487793, -36.0598754883}},
{"yw12", []float64{78.75, 78.92578125, 114.2578125, 114.609375}},
{"vcgqqmd", []float64{55.9725952148, 55.9739685059, 83.5977172852, 83.5990905762}},
{"27zp3q", []float64{-22.5988769531, -22.5933837891, -158.851318359, -158.840332031}},
{"cg4tb6", []float64{62.8967285156, 62.9022216797, -97.7233886719, -97.7124023438}},
{"w5njb9rnp5e", []float64{17.8936573863, 17.8936587274, 98.4693901241, 98.4693914652}},
{"9y6rp4pptv", []float64{36.3990193605, 36.399024725, -97.7684605122, -97.7684497833}},
{"pjup17j08hup", []float64{-56.4091892727, -56.409189105, 140.68680346, 140.686803795}},
{"vz52f33z9mu", []float64{84.5150206983, 84.5150220394, 83.421651721, 83.4216530621}},
{"zd", []float64{56.25, 61.875, 157.5, 168.75}},
{"q", []float64{-45.0, 0.0, 90.0, 135.0}},
{"dm46s", []float64{28.564453125, 28.6083984375, -75.41015625, -75.3662109375}},
{"d7dnvbu", []float64{20.8781433105, 20.8795166016, -75.6793212891, -75.677947998}},
{"02ercsvx0h", []float64{-85.7978796959, -85.7978743315, -164.106216431, -164.106205702}},
{"hnfx5f", []float64{-50.7897949219, -50.7843017578, 3.68041992188, 3.69140625}},
{"evb677f6t", []float64{32.7602863312, 32.7603292465, -10.7523107529, -10.7522678375}},
{"sg43r7p151", []float64{17.1113830805, 17.1113884449, 37.2424077988, 37.2424185276}},
{"441mdz5", []float64{-77.7447509766, -77.7433776855, -88.1172180176, -88.1158447266}},
{"k2qrsc7hr81", []float64{-42.2677946091, -42.267793268, 20.2522458136, 20.2522471547}},
{"d0ydzxhjwr", []float64{4.74158227444, 4.74158763885, -80.5240237713, -80.5240130424}},
{"4cn", []float64{-84.375, -82.96875, -47.8125, -46.40625}},
{"ds2", []float64{23.90625, 25.3125, -67.5, -66.09375}},
{"rxvywy", []float64{-0.230712890625, -0.225219726562, 165.882568359, 165.893554688}},
{"zs2k", []float64{69.609375, 69.78515625, 157.8515625, 158.203125}},
{"kg63", []float64{-26.54296875, -26.3671875, 36.9140625, 37.265625}},
{"hmxh64j", []float64{-58.3044433594, -58.3030700684, 21.1885070801, 21.1898803711}},
{"d5v3", []float64{21.26953125, 21.4453125, -82.6171875, -82.265625}},
{"ddg1", []float64{15.64453125, 15.8203125, -63.28125, -62.9296875}},
{"stf4tug", []float64{32.8092956543, 32.8106689453, 25.5693054199, 25.5706787109}},
{"vgc", []float64{66.09375, 67.5, 80.15625, 81.5625}},
{"jby3xf", []float64{-85.5065917969, -85.5010986328, 87.8796386719, 87.890625}},
{"9b1f419tdjf", []float64{0.360777229071, 0.360778570175, -98.6990234256, -98.6990220845}},
{"0zqp4", []float64{-47.98828125, -47.9443359375, -137.724609375, -137.680664062}},
{"gg292c4wd", []float64{63.5075855255, 63.5076284409, -10.5103969574, -10.5103540421}},
{"zy96qbt", []float64{81.9607543945, 81.9621276855, 170.811309814, 170.812683105}},
{"tz9x7f5c70", []float64{43.4731149673, 43.4731203318, 81.0294485092, 81.0294592381}},
{"rq", []float64{-11.25, -5.625, 146.25, 157.5}},
{"94tt", []float64{14.94140625, 15.1171875, -127.265625, -126.9140625}},
{"h7vnwf8", []float64{-67.7499389648, -67.7485656738, 18.5778808594, 18.5792541504}},
{"4f", []float64{-78.75, -73.125, -56.25, -45.0}},
{"kj3t", []float64{-14.58984375, -14.4140625, 2.109375, 2.4609375}},
{"qspj", []float64{-21.62109375, -21.4453125, 122.34375, 122.6953125}},
{"4y9", []float64{-53.4375, -52.03125, -54.84375, -53.4375}},
{"b05kqcvsm8", []float64{45.7574129105, 45.7574182749, -175.125267506, -175.125256777}},
{"p5zq4f", []float64{-67.8405761719, -67.8350830078, 145.316162109, 145.327148438}},
{"1cgx", []float64{-78.92578125, -78.75, -96.328125, -95.9765625}},
{"m2", []float64{-45.0, -39.375, 56.25, 67.5}},
{"j150xkd492", []float64{-84.2619609833, -84.2619556189, 49.5401537418, 49.5401644707}},
{"05rs", []float64{-71.015625, -70.83984375, -169.453125, -169.1015625}},
{"ve8", []float64{64.6875, 66.09375, 67.5, 68.90625}},
{"r9tv", []float64{-35.68359375, -35.5078125, 165.5859375, 165.9375}},
{"d71r07", []float64{18.1219482422, 18.1274414062, -76.9812011719, -76.9702148438}},
{"b6hepfqk6", []float64{56.79043293, 56.7904758453, -162.072629929, -162.072587013}},
{"md8y86mqjd", []float64{-29.7815215588, -29.7815161943, 68.5731196404, 68.5731303692}},
{"bcgcyq", []float64{55.1843261719, 55.1898193359, -140.701904297, -140.690917969}},
{"e3hpu9352", []float64{6.99472904205, 6.9947719574, -27.9258728027, -27.9258298874}},
{"hsbsq2rkmg", []float64{-62.5320607424, -62.532055378, 23.4879863262, 23.4879970551}},
{"ub2r", []float64{47.63671875, 47.8125, 34.1015625, 34.453125}},
{"d8", []float64{0.0, 5.625, -67.5, -56.25}},
{"gexm9j6y088", []float64{65.6841686368, 65.6841699779, -12.2569441795, -12.2569428384}},
{"15cdq", []float64{-68.5107421875, -68.466796875, -132.626953125, -132.583007812}},
{"9zud17gy", []float64{43.9669418335, 43.9671134949, -94.8617935181, -94.8614501953}},
{"4q3y", []float64{-53.7890625, -53.61328125, -76.2890625, -75.9375}},
{"gph138", []float64{84.5947265625, 84.6002197266, -39.3090820312, -39.2980957031}},
{"m09d8ju2b", []float64{-41.7163324356, -41.7162895203, 47.1152114868, 47.1152544022}},
{"8mszup4gk", []float64{32.3388147354, 32.3388576508, -161.890583038, -161.890540123}},
{"dyrfvtvqh", []float64{35.6722640991, 35.6723070145, -45.102481842, -45.1024389267}},
{"3h9tgkp", []float64{-18.6547851562, -18.6534118652, -132.738189697, -132.736816406}},
{"66gdty14u", []float64{-29.0583658218, -29.0583229065, -73.5738945007, -73.5738515854}},
{"83zp1d", []float64{11.0852050781, 11.0906982422, -158.840332031, -158.829345703}},
{"e7gp0", []float64{22.32421875, 22.3681640625, -29.53125, -29.4873046875}},
{"s0ykfgceytk", []float64{5.07498219609, 5.0749835372, 8.91225636005, 8.91225770116}},
{"zfe7", []float64{59.58984375, 59.765625, 173.3203125, 173.671875}},
{"cr9", []float64{87.1875, 88.59375, -122.34375, -120.9375}},
{"9ugr3kq", []float64{28.0165100098, 28.0178833008, -96.6165161133, -96.6151428223}},
{"2grcq0", []float64{-26.4990234375, -26.4935302734, -135.087890625, -135.076904297}},
{"50bb31vkds3p", []float64{-85.726895202, -85.7268950343, -43.8940487802, -43.8940484449}},
{"qhxdv1hcqe3", []float64{-19.1983763874, -19.1983750463, 100.773404986, 100.773406327}},
{"k2cv0gp2vk", []float64{-39.8857140541, -39.8857086897, 13.7540781498, 13.7540888786}},
{"y5jd", []float64{62.2265625, 62.40234375, 97.734375, 98.0859375}},
{"gnvg3p", []float64{83.5784912109, 83.583984375, -36.8701171875, -36.8591308594}},
{"9g70w", []float64{18.369140625, 18.4130859375, -96.767578125, -96.7236328125}},
{"9g7qsb", []float64{19.423828125, 19.4293212891, -96.4709472656, -96.4599609375}},
{"1zq2u", []float64{-49.0869140625, -49.04296875, -92.28515625, -92.2412109375}},
{"tr", []float64{39.375, 45.0, 56.25, 67.5}},
{"4wmddz9mgk38", []float64{-54.3620882928, -54.3620881252, -59.6429172903, -59.6429169551}},
{"wkcdsn8nbz", []float64{27.1951049566, 27.195110321, 103.535188437, 103.535199165}},
{"1r198wxj", []float64{-50.3247642517, -50.3245925903, -121.609039307, -121.608695984}},
{"eu", []float64{22.5, 28.125, -11.25, 0.0}},
{"k2y7mk6sd0wz", []float64{-40.1858386584, -40.1858384907, 20.2733035013, 20.2733038366}},
{"gms9ytw4v5", []float64{76.2758177519, 76.2758231163, -27.1277761459, -27.1277654171}},
{"2vdkc", []float64{-13.2275390625, -13.18359375, -143.041992188, -142.998046875}},
{"bke7fx3", []float64{71.011505127, 71.012878418, -164.068450928, -164.067077637}},
{"tvnxu7jt8", []float64{29.5047283173, 29.5047712326, 88.0849456787, 88.0849885941}},
{"f864yp3c", []float64{46.9296455383, 46.9298171997, -64.4214248657, -64.421081543}},
{"g8hxr7x150d7", []float64{46.2938149832, 46.2938151509, -15.8435266837, -15.8435263485}},
{"zmk4kmh", []float64{74.9542236328, 74.9555969238, 152.067260742, 152.068634033}},
{"gtqsvep4", []float64{75.3830337524, 75.3832054138, -13.1080627441, -13.1077194214}},
{"trsvy0", []float64{43.1982421875, 43.2037353516, 63.193359375, 63.2043457031}},
{"bevjfs", []float64{67.1264648438, 67.1319580078, -150.358886719, -150.347900391}},
{"ktrb", []float64{-15.46875, -15.29296875, 33.3984375, 33.75}},
{"dn20q1pv", []float64{35.2065467834, 35.2067184448, -89.7256851196, -89.7253417969}},
{"8n3wy5g2", []float64{36.3633728027, 36.3635444641, -177.622489929, -177.622146606}},
{"vyzft", []float64{83.408203125, 83.4521484375, 89.8681640625, 89.912109375}},
{"gwuedjbs8222", []float64{83.6163438857, 83.6163440533, -16.0832866654, -16.0832863301}},
{"fpb89dkktj83", []float64{88.6948023923, 88.6948025599, -89.2249056324, -89.2249052972}},
{"wjjk3", []float64{28.8720703125, 28.916015625, 97.4267578125, 97.470703125}},
{"wx6", []float64{40.78125, 42.1875, 115.3125, 116.71875}},
{"yzpuuv3w0pw", []float64{85.2398702502, 85.2398715913, 134.859245718, 134.859247059}},
{"k2518ucy", []float64{-44.7092056274, -44.7090339661, 15.5041122437, 15.5044555664}},
{"hkjk1075", []float64{-66.7949867249, -66.7948150635, 18.6808776855, 18.6812210083}},
{"btmd", []float64{74.8828125, 75.05859375, -149.765625, -149.4140625}},
{"ucbvmyuw9z", []float64{55.8048337698, 55.8048391342, 35.0636279583, 35.0636386871}},
{"wf86ytd34", []float64{14.5762825012, 14.5763254166, 124.390382767, 124.390425682}},
{"9zjbws3u", []float64{39.4869232178, 39.4870948792, -92.8760147095, -92.8756713867}},
{"rqc", []float64{-7.03125, -5.625, 147.65625, 149.0625}},
{"pwqw8gh8x74", []float64{-53.6845904589, -53.6845891178, 166.680077612, 166.680078954}},
{"5ekn", []float64{-70.6640625, -70.48828125, -16.875, -16.5234375}},
{"mxx0b3mzwxp", []float64{-2.67247259617, -2.67247125506, 77.3629210889, 77.36292243}},
{"tn", []float64{33.75, 39.375, 45.0, 56.25}},
{"ju59u9b8grr", []float64{-67.1826021373, -67.1826007962, 83.8704644144, 83.8704657555}},
{"hmte6v6jzq84", []float64{-58.4613495693, -58.4613494016, 19.1082823277, 19.1082826629}},
{"t3", []float64{5.625, 11.25, 56.25, 67.5}},
{"es9g6", []float64{25.8837890625, 25.927734375, -19.951171875, -19.9072265625}},
{"2bwcdk6y", []float64{-41.8994522095, -41.8992805481, -136.655158997, -136.654815674}},
{"4ew3jn8umh", []float64{-70.1002621651, -70.1002568007, -58.4899663925, -58.4899556637}},
{"0meekufm4x3", []float64{-58.4642212093, -58.4642198682, -163.616186231, -163.61618489}},
{"02", []float64{-90.0, -84.375, -168.75, -157.5}},
{"9yuv0t43sv0", []float64{38.8754063845, 38.8754077256, -94.5450460911, -94.54504475}},
{"u0g7y8444", []float64{49.8782730103, 49.8783159256, 4.85878944397, 4.85883235931}},
{"r4", []float64{-33.75, -28.125, 135.0, 146.25}},
{"1ps631dwde", []float64{-47.4076205492, -47.4076151848, -128.975951672, -128.975940943}},
{"vbmfdm", []float64{46.8731689453, 46.8786621094, 86.9348144531, 86.9458007812}},
{"s7gmxm8vhn5v", []float64{22.0916506089, 22.0916507766, 16.1401226744, 16.1401230097}},
{"mcwrh68t", []float64{-35.317440033, -35.3172683716, 87.7265167236, 87.7268600464}},
{"9yt5645jq9r9", []float64{37.145683486, 37.1456836537, -94.1264504939, -94.1264501587}},
{"61t1x3z", []float64{-36.2892150879, -36.2878417969, -82.6405334473, -82.6391601562}},
{"wmzkmbm", []float64{33.0921936035, 33.0935668945, 111.704864502, 111.706237793}},
{"jy", []float64{-56.25, -50.625, 78.75, 90.0}},
{"9ctckntmvgkr", []float64{8.69393778965, 8.69393795729, -92.9808190092, -92.980818674}},
{"8rnugbss4gd", []float64{40.2134129405, 40.2134142816, -159.086717069, -159.086715728}},
{"ek8", []float64{25.3125, 26.71875, -33.75, -32.34375}},
{"nvqegbu", []float64{-59.8054504395, -59.8040771484, 133.060913086, 133.062286377}},
{"h0u7v6pujp5", []float64{-85.1103597879, -85.1103584468, 6.21813699603, 6.21813833714}},
{"2rj", []float64{-5.625, -4.21875, -161.71875, -160.3125}},
{"jk38", []float64{-66.09375, -65.91796875, 58.359375, 58.7109375}},
{"c67qe30m", []float64{58.8051795959, 58.8053512573, -119.036521912, -119.036178589}},
{"xhtwk", []float64{26.4111328125, 26.455078125, 142.91015625, 142.954101562}},
{"pw6ue97rjn11", []float64{-54.0446339361, -54.0446337685, 161.525675207, 161.525675543}},
{"xpmw424pk6", []float64{41.8371927738, 41.8371981382, 142.836180925, 142.836191654}},
{"rk", []float64{-22.5, -16.875, 146.25, 157.5}},
{"hmfmn5uwdh2", []float64{-56.755605787, -56.7556044459, 14.6840000153, 14.6840013564}},
{"defvfj", []float64{22.1319580078, 22.1374511719, -63.544921875, -63.5339355469}},
{"sg7jd89w", []float64{19.2518234253, 19.2519950867, 38.0806732178, 38.0810165405}},
{"yn0", []float64{78.75, 80.15625, 90.0, 91.40625}},
{"3n7re6gv2e92", []float64{-8.50936442614, -8.5093642585, -130.281692259, -130.281691924}},
{"vyj", []float64{78.75, 80.15625, 85.78125, 87.1875}},
{"9cntsz", []float64{6.63024902344, 6.6357421875, -91.9006347656, -91.8896484375}},
{"w4uwq3d", []float64{16.5756225586, 16.5769958496, 96.6055297852, 96.6069030762}},
{"zffrf", []float64{61.8310546875, 61.875, 172.001953125, 172.045898438}},
{"hq6142s", []float64{-54.665222168, -54.663848877, 14.1668701172, 14.1682434082}},
{"m7srp977", []float64{-24.0746498108, -24.0744781494, 62.5606155396, 62.5609588623}},
{"8v6hxy3sz", []float64{30.3574132919, 30.3574562073, -143.094563484, -143.094520569}},
{"rf4snrh70h", []float64{-33.0078864098, -33.0078810453, 172.54611969, 172.546130419}},
{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
{"ssr6y", []float64{24.3896484375, 24.43359375, 32.958984375, 33.0029296875}},
{"z", []float64{45.0, 90.0, 135.0, 180.0}},
{"yz", []float64{84.375, 90.0, 123.75, 135.0}},
{"n2", []float64{-90.0, -84.375, 101.25, 112.5}},
{"vfe", []float64{59.0625, 60.46875, 82.96875, 84.375}},
{"h", []float64{-90.0, -45.0, 0.0, 45.0}},
{"z3", []float64{50.625, 56.25, 146.25, 157.5}},
{"z2web5usz", []float64{48.4930944443, 48.4931373596, 155.397105217, 155.397148132}},
{"8gkc", []float64{18.45703125, 18.6328125, -139.5703125, -139.21875}},
{"17de4", []float64{-69.78515625, -69.7412109375, -120.146484375, -120.102539062}},
{"ky71b3fwd2vv", []float64{-9.52539911494, -9.5253989473, 37.9832738265, 37.9832741618}},
{"e4gfcm", []float64{15.9796142578, 15.9851074219, -39.6716308594, -39.6606445312}},
{"kdgwxnnb", []float64{-28.3557128906, -28.3555412292, 27.7387619019, 27.7391052246}},
{"4nyn2chy", []float64{-50.9260940552, -50.9259223938, -81.5230178833, -81.5226745605}},
{"u6je3kk57", []float64{56.8451929092, 56.8452358246, 19.0449285507, 19.0449714661}},
{"nrs9j1hj5tj", []float64{-47.630340457, -47.6303391159, 107.803501636, 107.803502977}},
{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
{"krqrcc2hm9uq", []float64{-2.84883890301, -2.84883873537, 20.116208531, 20.1162088662}},
{"fmceu", []float64{78.0029296875, 78.046875, -76.46484375, -76.4208984375}},
{"q3w576", []float64{-35.9802246094, -35.9747314453, 109.830322266, 109.841308594}},
{"19ehn", []float64{-80.859375, -80.8154296875, -108.017578125, -107.973632812}},
{"zpkvjk", []float64{86.6821289062, 86.6876220703, 141.910400391, 141.921386719}},
{"7cgwy9jm", []float64{-33.9633750916, -33.9632034302, -6.03527069092, -6.03492736816}},
{"jju1cefk5v1", []float64{-57.3273199797, -57.3273186386, 50.6941701472, 50.6941714883}},
{"5tfpq6", []float64{-56.3708496094, -56.3653564453, -19.4128417969, -19.4018554688}},
{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
{"dm0vh8", []float64{29.00390625, 29.0093994141, -77.4975585938, -77.4865722656}},
{"jt5c8hhd58b", []float64{-61.5890081227, -61.5890067816, 72.7797675133, 72.7797688544}},
{"4ttr", []float64{-57.83203125, -57.65625, -60.1171875, -59.765625}},
{"d", []float64{0.0, 45.0, -90.0, -45.0}},
{"sp2d3v6wz", []float64{41.2067556381, 41.2067985535, 0.783762931824, 0.783805847168}},
{"up1yhbspqy", []float64{85.4337108135, 85.4337161779, 2.67546057701, 2.67547130585}},
{"1z4bx8dp1ex1", []float64{-50.5331422202, -50.5331420526, -97.0504023135, -97.0504019782}},
{"76qbnz5n1937", []float64{-32.3042606749, -32.3042605072, -23.9569957182, -23.9569953829}},
{"0w1bvcjb40xd", []float64{-56.112667881, -56.1126677133, -154.778384641, -154.778384306}},
{"x", []float64{0.0, 45.0, 135.0, 180.0}},
{"yqbjwyvsmc", []float64{83.9733606577, 83.9733660221, 101.554430723, 101.554441452}},
{"0hdp2tdy5", []float64{-63.3818435669, -63.3818006516, -177.161622047, -177.161579132}},
{"s8yv34v", []float64{5.15670776367, 5.15808105469, 32.0429992676, 32.0443725586}},
{"uc60k7w6", []float64{52.0947647095, 52.0949363708, 36.757850647, 36.7581939697}},
{"en6h7me8", []float64{35.9335327148, 35.9337043762, -42.0398712158, -42.0395278931}},
{"1bks34", []float64{-87.8356933594, -87.8302001953, -94.8779296875, -94.8669433594}},
{"65", []float64{-28.125, -22.5, -90.0, -78.75}},
{"qwrquvr", []float64{-8.62838745117, -8.62701416016, 122.913665771, 122.915039062}},
{"3dmchcusw83", []float64{-32.1575818956, -32.1575805545, -104.198862165, -104.198860824}},
{"urfeh", []float64{89.12109375, 89.1650390625, 14.94140625, 14.9853515625}},
{"d3g6rs", []float64{10.2612304688, 10.2667236328, -73.8500976562, -73.8391113281}},
{"wx0v", []float64{40.25390625, 40.4296875, 113.5546875, 113.90625}},
{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
{"et", []float64{28.125, 33.75, -22.5, -11.25}},
{"dqtd0", []float64{36.9140625, 36.9580078125, -71.015625, -70.9716796875}},
{"vtzuwhxhgjv8", []float64{78.1603311002, 78.1603312679, 78.6718585342, 78.6718588695}},
{"bn9", []float64{81.5625, 82.96875, -178.59375, -177.1875}},
{"685n", []float64{-43.9453125, -43.76953125, -63.28125, -62.9296875}},
{"fqy4nhy", []float64{83.3464050293, 83.3477783203, -70.0405883789, -70.0392150879}},
{"5q1dw", []float64{-55.810546875, -55.7666015625, -31.376953125, -31.3330078125}},
{"0sv8m11dgkf", []float64{-63.2313139737, -63.2313126326, -149.543696344, -149.543695003}},
{"vp435nn", []float64{84.5837402344, 84.5851135254, 48.3041381836, 48.3055114746}},
{"wydj", []float64{37.44140625, 37.6171875, 126.5625, 126.9140625}},
{"ebg8", []float64{4.21875, 4.39453125, -6.328125, -5.9765625}},
{"ksmb6pfxe4", []float64{-21.0059344769, -21.0059291124, 30.6773900986, 30.6774008274}},
{"8bv63qv96dp", []float64{4.65156197548, 4.65156331658, -138.804586083, -138.804584742}},
{"0s0d53hd", []float64{-67.1426010132, -67.1424293518, -156.647872925, -156.647529602}},
{"1yjf34ubpm", []float64{-55.8393591642, -55.8393537998, -93.1132829189, -93.1132721901}},
{"823y79hmfe", []float64{2.51137912273, 2.51138448715, -166.129310131, -166.129299402}},
{"xynnrv", []float64{34.8760986328, 34.8815917969, 177.528076172, 177.5390625}},
{"9b9ejqcqqdu", []float64{3.37801024318, 3.37801158428, -98.9079111814, -98.9079098403}},
{"cuuhfkd", []float64{72.5784301758, 72.5798034668, -95.5233764648, -95.5220031738}},
{"khwceh", []float64{-19.4018554688, -19.3963623047, 9.6240234375, 9.63500976562}},
{"z8vub32sy6", []float64{50.061403513, 50.0614088774, 165.597878695, 165.597889423}},
{"4s", []float64{-67.5, -61.875, -67.5, -56.25}},
{"bsrb4qeqt7eq", []float64{68.9430911466, 68.9430913143, -146.497992687, -146.497992352}},
{"b1x0sc", []float64{53.5308837891, 53.5363769531, -169.947509766, -169.936523438}},
{"1ngn2bn2vub", []float64{-50.9324629605, -50.9324616194, -130.739461184, -130.739459842}},
{"bsm", []float64{68.90625, 70.3125, -150.46875, -149.0625}},
{"xyzd7", []float64{38.3642578125, 38.408203125, 179.428710938, 179.47265625}},
{"cvvf1tqs", []float64{77.7248382568, 77.7250099182, -93.0892181396, -93.0888748169}},
{"fz2tpb1xj", []float64{86.6613578796, 86.661400795, -55.2040243149, -55.2039813995}},
{"r4zqr7", []float64{-28.4161376953, -28.4106445312, 145.513916016, 145.524902344}},
{"wth", []float64{28.125, 29.53125, 118.125, 119.53125}},
{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
{"4j0ff5xs58h", []float64{-61.3716888428, -61.3716875017, -88.8469666243, -88.8469652832}},
{"c15", []float64{50.625, 52.03125, -130.78125, -129.375}},
{"tnkgyg", []float64{35.8319091797, 35.8374023438, 51.9763183594, 51.9873046875}},
{"e99", []float64{8.4375, 9.84375, -21.09375, -19.6875}},
{"bcrz69", []float64{53.3111572266, 53.3166503906, -135.241699219, -135.230712891}},
{"e5w29f6", []float64{19.7877502441, 19.7891235352, -36.1312866211, -36.1299133301}},
{"gykjjcq656b5", []float64{81.0423812829, 81.0423814505, -5.36359190941, -5.36359157413}},
{"7j62nzypd3be", []float64{-15.4248806275, -15.4248804599, -41.5309696645, -41.5309693292}},
{"rrsgbbvwt9r7", []float64{-2.14807743207, -2.14807726443, 152.970445342, 152.970445678}},
{"93kub", []float64{7.8662109375, 7.91015625, -117.0703125, -117.026367188}},
{"8", []float64{0.0, 45.0, -180.0, -135.0}},
{"02bcjtw5f84", []float64{-85.5746126175, -85.5746112764, -167.445263565, -167.445262223}},
{"262dqsqs1", []float64{-31.9242095947, -31.9241666794, -167.752261162, -167.752218246}},
{"185ss806c", []float64{-89.2085123062, -89.2084693909, -107.379984856, -107.37994194}},
{"9s6", []float64{23.90625, 25.3125, -109.6875, -108.28125}},
{"ych1d25e", []float64{50.8891868591, 50.8893585205, 129.478683472, 129.479026794}},
{"7f2qcykht0d", []float64{-31.1221191287, -31.1221177876, -10.8158227801, -10.815821439}},
{"5gk7qw", []float64{-71.1145019531, -71.1090087891, -4.98779296875, -4.97680664062}},
{"7kjutjpu98", []float64{-21.6807460785, -21.6807407141, -25.4336285591, -25.4336178303}},
{"h64", []float64{-78.75, -77.34375, 14.0625, 15.46875}},
{"uy57", []float64{79.27734375, 79.453125, 38.3203125, 38.671875}},
{"r5dtqkydk796", []float64{-24.3631505594, -24.3631503917, 138.799393661, 138.799393997}},
{"5j", []float64{-61.875, -56.25, -45.0, -33.75}},
{"xzbszzeu", []float64{44.4705963135, 44.4707679749, 169.798851013, 169.799194336}},
{"wqjz0fj5e", []float64{34.9920558929, 34.9920988083, 109.375891685, 109.375934601}},
{"dekz", []float64{19.51171875, 19.6875, -60.8203125, -60.46875}},
{"bbyux", []float64{50.009765625, 50.0537109375, -136.450195312, -136.40625}},
{"rctysz36", []float64{-35.3797531128, -35.3795814514, 177.046394348, 177.046737671}},
{"xmhvqm1wcw7", []float64{29.0765096247, 29.0765109658, 153.206474036, 153.206475377}},
{"nhw6c", []float64{-64.2041015625, -64.16015625, 98.8330078125, 98.876953125}},
{"u9d3gdu", []float64{53.7602233887, 53.7615966797, 25.8233642578, 25.8247375488}},
{"xenu1tyd0q", []float64{17.6100862026, 17.610091567, 167.067042589, 167.067053318}},
{"qm70t", []float64{-15.380859375, -15.3369140625, 105.688476562, 105.732421875}},
{"3g0", []float64{-28.125, -26.71875, -101.25, -99.84375}},
{"fg", []float64{61.875, 67.5, -56.25, -45.0}},
{"jq1tn6nn0hfs", []float64{-55.3590513021, -55.3590511344, 58.642276302, 58.6422766373}},
{"b9kmw", []float64{52.998046875, 53.0419921875, -151.259765625, -151.215820312}},
{"z7f9pj6", []float64{66.2983703613, 66.2997436523, 150.07598877, 150.077362061}},
{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
{"hqbvrz4x", []float64{-51.0687446594, -51.068572998, 12.6486968994, 12.6490402222}},
{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
{"81es", []float64{9.140625, 9.31640625, -175.078125, -174.7265625}},
{"nyn6mf2", []float64{-55.8421325684, -55.8407592773, 132.791748047, 132.793121338}},
{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
{"46hfqnuv", []float64{-78.3165550232, -78.3163833618, -71.8001174927, -71.7997741699}},
{"7tne", []float64{-16.34765625, -16.171875, -13.359375, -13.0078125}},
{"z7pkduh4g2c", []float64{62.6884643734, 62.6884657145, 156.571796089, 156.571797431}},
{"xgmm", []float64{19.16015625, 19.3359375, 176.1328125, 176.484375}},
{"he054x", []float64{-72.5592041016, -72.5537109375, 22.6098632812, 22.6208496094}},
{"rqnu21r8g", []float64{-10.4959344864, -10.495891571, 155.752615929, 155.752658844}},
{"k8xg8n36", []float64{-41.5375900269, -41.5374183655, 33.4001541138, 33.4004974365}},
{"d60md", []float64{12.216796875, 12.2607421875, -78.310546875, -78.2666015625}},
{"50tqctv", []float64{-85.9693908691, -85.9680175781, -37.5444030762, -37.5430297852}},
{"yxknv54eqnz", []float64{86.984847039, 86.9848483801, 118.34842667, 118.348428011}},
{"5zpczbcjgj", []float64{-50.3122490644, -50.3122437, -0.00948429107666, -0.0094735622406}},
{"yp7nz0rr7d8", []float64{86.9704046845, 86.9704060256, 94.5364737511, 94.5364750922}},
{"kfjb9s772f", []float64{-33.6381947994, -33.638189435, 41.9063508511, 41.9063615799}},
{"q", []float64{-45.0, 0.0, 90.0, 135.0}},
{"cx1f", []float64{84.7265625, 84.90234375, -110.0390625, -109.6875}},
{"y", []float64{45.0, 90.0, 90.0, 135.0}},
{"hdtcdg62524", []float64{-75.6559753418, -75.6559740007, 30.7100191712, 30.7100205123}},
{"fpy32", []float64{88.8134765625, 88.857421875, -81.2109375, -81.1669921875}},
{"256qs", []float64{-25.576171875, -25.5322265625, -176.66015625, -176.616210938}},
{"6rgzd89v3r48", []float64{-0.0842052698135, -0.0842051021755, -73.3642389625, -73.3642386273}},
{"c8430hd23", []float64{45.2005434036, 45.200586319, -109.33280468, -109.332761765}},
{"xtn", []float64{28.125, 29.53125, 165.9375, 167.34375}},
{"1u2s9", []float64{-65.302734375, -65.2587890625, -100.502929688, -100.458984375}},
{"5c80k35m35p", []float64{-81.512144208, -81.5121428668, -11.058716923, -11.0587155819}},
{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
{"u7c0rfsdjn", []float64{66.1518037319, 66.1518090963, 13.0032205582, 13.003231287}},
{"f4ce8gy", []float64{61.1045837402, 61.1059570312, -87.8494262695, -87.8480529785}},
{"pbd4y02", []float64{-86.7027282715, -86.7013549805, 171.826171875, 171.827545166}},
{"9n", []float64{33.75, 39.375, -135.0, -123.75}},
{"ztybk5c8mw", []float64{77.4083697796, 77.408375144, 167.170264721, 167.17027545}},
{"ks8pv0cv5u6j", []float64{-18.3201934956, -18.320193328, 22.7222934365, 22.7222937718}},
{"nuh", []float64{-67.5, -66.09375, 129.375, 130.78125}},
{"6khcf5xumxz", []float64{-22.1723856032, -22.1723842621, -71.9715334475, -71.9715321064}},
{"nj2", []float64{-60.46875, -59.0625, 90.0, 91.40625}},
{"mdd459nebt64", []float64{-30.5797721073, -30.5797719397, 70.4752591252, 70.4752594605}},
{"e7ujxh", []float64{22.0825195312, 22.0880126953, -27.8173828125, -27.8063964844}},
{"eb1zvy9n", []float64{1.39904022217, 1.39921188354, -8.53500366211, -8.53466033936}},
{"1huxmt", []float64{-61.9793701172, -61.9738769531, -128.430175781, -128.419189453}},
{"v4cyrgyg", []float64{61.5884971619, 61.5886688232, 47.8107833862, 47.811126709}},
{"j6h0dvut", []float64{-78.6296653748, -78.6294937134, 62.0020294189, 62.0023727417}},
{"r8j", []float64{-45.0, -43.59375, 164.53125, 165.9375}},
{"feyj018b7", []float64{66.9809389114, 66.9809818268, -59.0613412857, -59.0612983704}},
{"tw33887htf7j", []float64{35.4220805503, 35.422080718, 69.2841558158, 69.2841561511}},
{"3rbgkj8z4cy8", []float64{-0.803537517786, -0.803537350148, -122.518374547, -122.518374212}},
{"gq30gnn9dg", []float64{80.3213185072, 80.3213238716, -32.2028696537, -32.2028589249}},
{"rek4qy2", []float64{-26.2889099121, -26.2875366211, 163.421630859, 163.42300415}},
{"j8", []float64{-90.0, -84.375, 67.5, 78.75}},
{"f2rev1d7", []float64{47.0741844177, 47.0743560791, -67.9803085327, -67.97996521}},
{"rw1sg2f78x", []float64{-10.4102808237, -10.4102754593, 159.755308628, 159.755319357}},
{"r", []float64{-45.0, 0.0, 135.0, 180.0}},
{"8xfw", []float64{44.6484375, 44.82421875, -153.984375, -153.6328125}},
{"fj20", []float64{74.53125, 74.70703125, -90.0, -89.6484375}},
{"m76w867pt9yj", []float64{-25.5625145696, -25.562514402, 59.7809752822, 59.7809756175}},
{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
{"wdb4v9vwt4ez", []float64{15.9628918581, 15.9628920257, 112.749471925, 112.74947226}},
{"m53rvw", []float64{-25.3234863281, -25.3179931641, 46.9995117188, 47.0104980469}},
{"f33", []float64{52.03125, 53.4375, -77.34375, -75.9375}},
{"0t36bun98", []float64{-59.9631214142, -59.9630784988, -155.700302124, -155.700259209}},
{"ezs1", []float64{42.36328125, 42.5390625, -5.625, -5.2734375}},
{"jb", []float64{-90.0, -84.375, 78.75, 90.0}},
{"vd0k", []float64{56.953125, 57.12890625, 67.8515625, 68.203125}},
{"39cqr9bd", []float64{-34.0476608276, -34.0474891663, -110.411911011, -110.411567688}},
{"n8r", []float64{-88.59375, -87.1875, 122.34375, 123.75}},
{"1b6", []float64{-88.59375, -87.1875, -98.4375, -97.03125}},
{"358wn7v3tch", []float64{-24.2369502783, -24.2369489372, -134.014754891, -134.01475355}},
{"d506n2v77qf", []float64{17.2312764823, 17.2312778234, -89.366427362, -89.3664260209}},
{"qb", []float64{-45.0, -39.375, 123.75, 135.0}},
{"sn", []float64{33.75, 39.375, 0.0, 11.25}},
{"5rj", []float64{-50.625, -49.21875, -26.71875, -25.3125}},
{"0y51c", []float64{-55.9423828125, -55.8984375, -141.987304688, -141.943359375}},
{"r85z8", []float64{-43.681640625, -43.6376953125, 162.7734375, 162.817382812}},
{"rk5z", []float64{-21.26953125, -21.09375, 151.5234375, 151.875}},
{"vzn2", []float64{84.375, 84.55078125, 87.5390625, 87.890625}},
{"bjvk", []float64{78.046875, 78.22265625, -172.6171875, -172.265625}},
{"b9f", []float64{54.84375, 56.25, -154.6875, -153.28125}},
{"q0u1y3mu4k", []float64{-40.4660582542, -40.4660528898, 95.907651186, 95.9076619148}},
{"r7", []float64{-28.125, -22.5, 146.25, 157.5}},
{"cv90dz31", []float64{76.0653877258, 76.0655593872, -99.7215270996, -99.7211837769}},
{"gxfeekgv9sng", []float64{89.2360430025, 89.2360431701, -18.8363294676, -18.8363291323}},
{"92t", []float64{2.8125, 4.21875, -116.71875, -115.3125}},
{"m06", []float64{-43.59375, -42.1875, 47.8125, 49.21875}},
{"n27p3f3c", []float64{-87.306804657, -87.3066329956, 105.548057556, 105.548400879}},
{"jjptjwvt2", []float64{-60.9581136703, -60.958070755, 55.7961273193, 55.7961702347}},
{"u258hxwr", []float64{45.0424003601, 45.0425720215, 16.3782119751, 16.3785552979}},
{"t47", []float64{12.65625, 14.0625, 49.21875, 50.625}},
{"e", []float64{0.0, 45.0, -45.0, 0.0}},
{"0s", []float64{-67.5, -61.875, -157.5, -146.25}},
{"4drwu7v94g3", []float64{-76.1364381015, -76.1364367604, -56.758684963, -56.7586836219}},
{"wk14pc", []float64{22.8570556641, 22.8625488281, 102.996826172, 103.0078125}},
{"w5xg", []float64{20.21484375, 20.390625, 100.8984375, 101.25}},
{"f2z2dvf7", []float64{49.3387413025, 49.3389129639, -68.4307479858, -68.4304046631}},
{"duk0212sgtp", []float64{23.9579039812, 23.9579053223, -50.6241537631, -50.624152422}},
{"h7z", []float64{-68.90625, -67.5, 21.09375, 22.5}},
{"31k02yzy90", []float64{-37.8866100311, -37.8866046667, -129.331355095, -129.331344366}},
{"vus0k", []float64{70.3564453125, 70.400390625, 84.55078125, 84.5947265625}},
{"5m0sjm4rrvf", []float64{-61.1431337893, -61.1431324482, -32.8127369285, -32.8127355874}},
{"4syd3n9", []float64{-62.8500366211, -62.8486633301, -58.3140563965, -58.3126831055}},
{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
{"g", []float64{45.0, 90.0, -45.0, 0.0}},
{"ntn69zcu", []float64{-61.392288208, -61.3921165466, 121.368370056, 121.368713379}},
{"vnc4m", []float64{83.3642578125, 83.408203125, 46.6259765625, 46.669921875}},
{"sy848jtk7wdj", []float64{37.0329307951, 37.0329309627, 33.7573626637, 33.757362999}},
{"ry7y", []float64{-8.7890625, -8.61328125, 174.0234375, 174.375}},
{"5k7gesef08j", []float64{-65.453453064, -65.4534517229, -28.3175759017, -28.3175745606}},
{"n5gnev", []float64{-67.7362060547, -67.7307128906, 94.3835449219, 94.39453125}},
{"kz3eeg1trf7", []float64{-3.58612284064, -3.58612149954, 36.0265664756, 36.0265678167}},
{"t55rxfr7", []float64{18.2062339783, 18.2064056396, 49.9208450317, 49.9211883545}},
{"tm9", []float64{30.9375, 32.34375, 57.65625, 59.0625}},
{"pjw", []float64{-59.0625, -57.65625, 143.4375, 144.84375}},
{"f", []float64{45.0, 90.0, -90.0, -45.0}},
{"2my", []float64{-12.65625, -11.25, -160.3125, -158.90625}},
{"3w6bwyt", []float64{-9.72015380859, -9.71878051758, -108.329315186, -108.327941895}},
{"v4s8r3q", []float64{59.1133117676, 59.1146850586, 51.6549682617, 51.6563415527}},
{"ggx8be488kyn", []float64{64.8359277472, 64.8359279148, -0.677700340748, -0.677700005472}},
{"95w", []float64{19.6875, 21.09375, -126.5625, -125.15625}},
{"9jck7spubf", []float64{33.1136190891, 33.1136244535, -133.077703714, -133.077692986}},
{"f1zfe", []float64{55.283203125, 55.3271484375, -78.9697265625, -78.92578125}},
{"p5ycnk9j", []float64{-68.7048912048, -68.7047195435, 144.768218994, 144.768562317}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"zk", []float64{67.5, 73.125, 146.25, 157.5}},
{"rkq7z", []float64{-20.4345703125, -20.390625, 155.346679688, 155.390625}},
{"j5wsc3t4", []float64{-69.4689559937, -69.4687843323, 54.2024230957, 54.2027664185}},
{"56szw", []float64{-74.619140625, -74.5751953125, -26.806640625, -26.7626953125}},
{"6xp21s7", []float64{-5.60165405273, -5.60028076172, -57.2346496582, -57.2332763672}},
{"gbmymdgc5x", []float64{47.520198226, 47.5202035904, -2.91706323624, -2.9170525074}},
{"bmm935md5m", []float64{74.7691994905, 74.769204855, -160.963987112, -160.963976383}},
{"z2v55d8", []float64{49.7598266602, 49.7611999512, 153.435058594, 153.436431885}},
{"q232vgb0", []float64{-43.4413146973, -43.4411430359, 103.260498047, 103.26084137}},
{"b7kf7b5uz4", []float64{63.6775839329, 63.6775892973, -161.900067329, -161.900056601}},
{"y6", []float64{56.25, 61.875, 101.25, 112.5}},
{"3xdf2ts4d", []float64{-2.38635063171, -2.38630771637, -108.605260849, -108.605217934}},
{"0szy1551", []float64{-62.2099113464, -62.2097396851, -146.553497314, -146.553153992}},
{"z", []float64{45.0, 90.0, 135.0, 180.0}},
{"kk86s", []float64{-19.248046875, -19.2041015625, 11.77734375, 11.8212890625}},
{"tqq", []float64{35.15625, 36.5625, 64.6875, 66.09375}},
{"3gcy97b", []float64{-22.7430725098, -22.7416992188, -98.7341308594, -98.7327575684}},
{"5dnz5z1d", []float64{-77.4807357788, -77.4805641174, -12.8409576416, -12.8406143188}},
{"eumv1pe", []float64{24.8263549805, 24.8277282715, -3.11599731445, -3.11462402344}},
{"2kxmbgqtfc3", []float64{-18.6579112709, -18.6579099298, -158.512682766, -158.512681425}},
{"hc5uf211rpb", []float64{-83.5397829115, -83.5397815704, 39.1239881516, 39.1239894927}},
{"p4khbd21", []float64{-76.496257782, -76.4960861206, 140.646972656, 140.647315979}},
{"xuq", []float64{23.90625, 25.3125, 177.1875, 178.59375}},
{"gn63cf91nhf", []float64{80.47779724, 80.4777985811, -41.7573997378, -41.7573983967}},
{"5hd1nh3", []float64{-64.4883728027, -64.4869995117, -41.922454834, -41.921081543}},
{"ustjn5", []float64{71.2078857422, 71.2133789062, 29.794921875, 29.8059082031}},
{"btv2", []float64{77.34375, 77.51953125, -150.1171875, -149.765625}},
{"7ds5rkh3u1", []float64{-30.3439325094, -30.343927145, -16.5503883362, -16.5503776073}},
{"54", []float64{-78.75, -73.125, -45.0, -33.75}},
{"7sr", []float64{-21.09375, -19.6875, -12.65625, -11.25}},
{"kr552fbb", []float64{-5.03860473633, -5.03843307495, 15.5027389526, 15.5030822754}},
{"8uc453nv4qq4", []float64{27.0766978338, 27.0766980015, -144.691553414, -144.691553079}},
{"6n3m", []float64{-8.96484375, -8.7890625, -88.2421875, -87.890625}},
{"rptzf8", []float64{-1.4501953125, -1.44470214844, 143.195800781, 143.206787109}},
{"x63b", []float64{12.65625, 12.83203125, 148.7109375, 149.0625}},
{"qwj7353", []float64{-10.6608581543, -10.6594848633, 119.928131104, 119.929504395}},
{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
{"v6", []float64{56.25, 61.875, 56.25, 67.5}},
{"5mpwtp", []float64{-60.6939697266, -60.6884765625, -22.9833984375, -22.9724121094}},
{"ukgs", []float64{72.421875, 72.59765625, 16.171875, 16.5234375}},
{"qxyb4g6qf", []float64{-1.3872385025, -1.38719558716, 122.116212845, 122.11625576}},
{"qhww7zdb5v", []float64{-18.5476416349, -18.5476362705, 99.3093574047, 99.3093681335}},
{"wfwffcw5vd", []float64{14.5547926426, 14.554798007, 133.37151289, 133.371523619}},
{"rynp", []float64{-10.01953125, -9.84375, 177.1875, 177.5390625}},
{"fb57ykxryn82", []float64{45.6852641702, 45.6852643378, -51.3948151097, -51.3948147744}},
{"30sq", []float64{-41.1328125, -40.95703125, -129.0234375, -128.671875}},
{"9", []float64{0.0, 45.0, -135.0, -90.0}},
{"e5gjz7", []float64{22.1209716797, 22.1264648438, -40.4626464844, -40.4516601562}},
{"t6vkbx", []float64{16.3421630859, 16.34765625, 63.6547851562, 63.6657714844}},
{"3e", []float64{-28.125, -22.5, -112.5, -101.25}},
{"th", []float64{22.5, 28.125, 45.0, 56.25}},
{"7j", []float64{-16.875, -11.25, -45.0, -33.75}},
{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
{"1r15z2z5", []float64{-49.9611854553, -49.9610137939, -122.015533447, -122.015190125}},
{"k9e57kg2f3", []float64{-35.9649842978, -35.9649789333, 26.866132021, 26.8661427498}},
{"4w", []float64{-56.25, -50.625, -67.5, -56.25}},
{"82c582qhsx7", []float64{4.83616903424, 4.83617037535, -167.324326783, -167.324325442}},
{"qzdzkwwdc", []float64{-1.50190830231, -1.50186538696, 127.823910713, 127.823953629}},
{"6xn26p6fnr0", []float64{-5.54084837437, -5.54084703326, -58.6190021038, -58.6190007627}},
{"wm4y0xu8ee", []float64{29.2223614454, 29.2223668098, 105.14549017, 105.145500898}},
{"rke5rewv", []float64{-19.0961265564, -19.095954895, 150.807609558, 150.807952881}},
{"0bn3vwcjj", []float64{-89.6544456482, -89.6544027328, -137.217650414, -137.217607498}},
{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
{"red", []float64{-25.3125, -23.90625, 160.3125, 161.71875}},
{"r2", []float64{-45.0, -39.375, 146.25, 157.5}},
{"v7qptt2u5k", []float64{64.6291565895, 64.6291619539, 64.9303686619, 64.9303793907}},
{"pey", []float64{-68.90625, -67.5, 165.9375, 167.34375}},
{"r7cg5cz8", []float64{-23.3692932129, -23.3691215515, 148.886032104, 148.886375427}},
{"9", []float64{0.0, 45.0, -135.0, -90.0}},
{"qqr5mxz", []float64{-9.22988891602, -9.228515625, 111.345062256, 111.346435547}},
{"wuuvdz7x2", []float64{27.7266168594, 27.7266597748, 130.555343628, 130.555386543}},
{"t", []float64{0.0, 45.0, 45.0, 90.0}},
{"4mgwkd9yp3r5", []float64{-56.5428471006, -56.542846933, -73.6276473105, -73.6276469752}},
{"dpk6jbemhyvh", []float64{41.1364542693, 41.1364544369, -83.7660782039, -83.7660778686}},
{"768py6th4818", []float64{-29.5607757568, -29.5607755892, -33.4683660418, -33.4683657065}},
{"q8", []float64{-45.0, -39.375, 112.5, 123.75}},
{"xmgvmq9cf", []float64{33.3026075363, 33.3026504517, 151.756639481, 151.756682396}},
{"6rhhdy3", []float64{-4.79965209961, -4.79827880859, -73.0027770996, -73.0014038086}},
{"05dd6myj4xqj", []float64{-69.884508457, -69.8845082894, -176.377142966, -176.377142631}},
{"yw6", []float64{80.15625, 81.5625, 115.3125, 116.71875}},
{"6mt6h4jrw", []float64{-13.6986637115, -13.6986207962, -71.1839389801, -71.1838960648}},
{"86ymqv2s", []float64{16.4211273193, 16.4212989807, -159.663619995, -159.663276672}},
{"vygq9vr6umc", []float64{84.1406701505, 84.1406714916, 83.4073568881, 83.4073582292}},
{"89g6sp8p81", []float64{10.3256946802, 10.3257000446, -152.75390625, -152.753895521}},
{"w", []float64{0.0, 45.0, 90.0, 135.0}},
{"6st", []float64{-19.6875, -18.28125, -60.46875, -59.0625}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"sk", []float64{22.5, 28.125, 11.25, 22.5}},
{"t", []float64{0.0, 45.0, 45.0, 90.0}},
{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
{"f3pf69r61v4", []float64{51.0277444124, 51.0277457535, -67.7316650748, -67.7316637337}},
{"r9qde0cj", []float64{-37.5243186951, -37.5241470337, 166.773834229, 166.774177551}},
{"nbm5pqh", []float64{-88.0334472656, -88.0320739746, 131.10534668, 131.106719971}},
{"u7f9fh2dzpw9", []float64{66.4252256043, 66.425225772, 14.8545113951, 14.8545117304}},
{"pnzr19j29", []float64{-50.7952022552, -50.7951593399, 145.268483162, 145.268526077}},
{"3e7rf8ww4fe5", []float64{-25.3526548482, -25.3526546806, -107.810775787, -107.810775451}},
{"1x", []float64{-50.625, -45.0, -112.5, -101.25}},
{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
{"4h4swejxpzv", []float64{-66.6912616789, -66.6912603378, -86.1908380687, -86.1908367276}},
{"hy6gnf0u", []float64{-54.3047332764, -54.304561615, 37.9148483276, 37.9151916504}},
{"xnycujudf", []float64{38.3084249496, 38.308467865, 144.67423439, 144.674277306}},
{"t6bypmux9z54", []float64{16.5563485399, 16.5563487075, 57.6295499504, 57.6295502856}},
{"ufw1c6xp2t", []float64{59.3851214647, 59.3851268291, 42.2520661354, 42.2520768642}},
{"42jpxwe9byn", []float64{-88.6456024647, -88.6456011236, -71.3843134046, -71.3843120635}},
{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
{"tj0de", []float64{28.564453125, 28.6083984375, 45.8349609375, 45.87890625}},
{"e0kqd", []float64{2.548828125, 2.5927734375, -38.935546875, -38.8916015625}},
{"bzysgj0rr", []float64{89.4574213028, 89.4574642181, -136.976895332, -136.976852417}},
{"bxdp8ycgtcqt", []float64{88.543546591, 88.5435467586, -154.651882276, -154.651881941}},
{"k0kx2785", []float64{-42.2995948792, -42.2994232178, 6.33911132812, 6.33945465088}},
{"75ugg", []float64{-23.2470703125, -23.203125, -38.1884765625, -38.14453125}},
{"sbbsbv2r", []float64{5.08375167847, 5.08392333984, 34.4864273071, 34.4867706299}},
{"u7vunvq7rn", []float64{66.8263041973, 66.8263095617, 19.6414518356, 19.6414625645}},
{"w4m7uexcx", []float64{13.3349132538, 13.3349561691, 97.591509819, 97.5915527344}},
{"350g", []float64{-27.59765625, -27.421875, -133.9453125, -133.59375}},
{"p", []float64{-90.0, -45.0, 135.0, 180.0}},
{"t8w2g1m1", []float64{2.95137405396, 2.95154571533, 76.4277648926, 76.4281082153}},
{"96k738f", []float64{13.2316589355, 13.2330322266, -117.704772949, -117.703399658}},
{"c26nv174", []float64{47.5999832153, 47.6001548767, -120.713653564, -120.713310242}},
{"s67g9ehvds", []float64{13.2889294624, 13.2889348269, 16.5959858894, 16.5959966183}},
{"4ybt4sw", []float64{-51.1276245117, -51.1262512207, -55.4287719727, -55.4273986816}},
{"5jqnz9zqyk12", []float64{-59.2714333534, -59.2714331858, -36.2226838991, -36.2226835638}},
{"31d5nguy", []float64{-36.0135269165, -36.0133552551, -131.884346008, -131.884002686}},
{"m4bbcg", []float64{-29.3829345703, -29.3774414062, 46.1315917969, 46.142578125}},
{"u", []float64{45.0, 90.0, 0.0, 45.0}},
{"mkx0hzzq", []float64{-19.6438980103, -19.6437263489, 66.3124465942, 66.312789917}},
{"9bv7x0", []float64{4.833984375, 4.83947753906, -93.5595703125, -93.5485839844}},
{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
{"0x7pv0083h", []float64{-47.8563809395, -47.8563755751, -153.060793877, -153.060783148}},
{"ruqzw0ztgw", []float64{-19.7702515125, -19.7702461481, 178.516309261, 178.51631999}},
{"xpy5c3c", []float64{44.2625427246, 44.2639160156, 143.493804932, 143.495178223}},
{"bnn6fxqm", []float64{79.2740821838, 79.2742538452, -171.09249115, -171.092147827}},
{"fnm9u1t", []float64{80.4721069336, 80.4734802246, -82.0829772949, -82.0816040039}},
{"jfvn54", []float64{-73.4655761719, -73.4600830078, 85.9130859375, 85.9240722656}},
{"dj", []float64{28.125, 33.75, -90.0, -78.75}},
{"mxfstk2s", []float64{-0.591201782227, -0.59103012085, 71.2470245361, 71.2473678589}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"4xy4p", []float64{-46.0546875, -46.0107421875, -58.7548828125, -58.7109375}},
{"cbdg47d", []float64{48.3590698242, 48.3604431152, -97.2811889648, -97.2798156738}},
{"1ddv9", []float64{-74.970703125, -74.9267578125, -108.588867188, -108.544921875}},
{"4cn07cd", []float64{-84.3228149414, -84.3214416504, -47.6449584961, -47.6435852051}},
{"dq8fjtv64n", []float64{36.9460237026, 36.946029067, -77.4463176727, -77.4463069439}},
{"gx2qc4", []float64{86.9787597656, 86.9842529297, -22.1044921875, -22.0935058594}},
{"yx", []float64{84.375, 90.0, 112.5, 123.75}},
{"44", []float64{-78.75, -73.125, -90.0, -78.75}},
{"zz679sbs", []float64{86.4232635498, 86.4234352112, 171.980667114, 171.981010437}},
{"2fdh2u769u7y", []float64{-30.1666307822, -30.1666306145, -143.399997689, -143.399997354}},
{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
{"11cxx", []float64{-78.837890625, -78.7939453125, -132.583007812, -132.5390625}},
{"ygf2", []float64{66.09375, 66.26953125, 126.9140625, 127.265625}},
{"m0uscc", []float64{-39.9407958984, -39.9353027344, 51.4050292969, 51.416015625}},
{"y", []float64{45.0, 90.0, 90.0, 135.0}},
{"kn2m", []float64{-8.96484375, -8.7890625, 0.3515625, 0.703125}},
{"k56d043m45", []float64{-26.3539534807, -26.3539481163, 3.51742744446, 3.51743817329}},
{"m7", []float64{-28.125, -22.5, 56.25, 67.5}},
{"pwe", []float64{-53.4375, -52.03125, 161.71875, 163.125}},
{"1j7q0hs8u", []float64{-59.3892145157, -59.3891716003, -130.423336029, -130.423293114}},
{"f264077wzn", []float64{46.776856184, 46.7768615484, -75.9214067459, -75.9213960171}},
{"2jvn", []float64{-11.6015625, -11.42578125, -172.96875, -172.6171875}},
{"4d4ghmzzsjb9", []float64{-78.1897520833, -78.1897519156, -63.4352295846, -63.4352292493}},
{"1h2n6vef0we", []float64{-64.9645265937, -64.9645252526, -134.873975068, -134.873973727}},
{"vnfc1v2yh", []float64{83.1744003296, 83.1744432449, 48.9452934265, 48.9453363419}},
{"2b5brk", []float64{-44.9340820312, -44.9285888672, -140.657958984, -140.646972656}},
{"yntb", []float64{81.5625, 81.73828125, 98.0859375, 98.4375}},
{"5fj85yy", []float64{-78.7129211426, -78.7115478516, -3.34259033203, -3.34121704102}},
{"cm36wevqn", []float64{74.9923324585, 74.9923753738, -121.699075699, -121.699032784}},
{"7hf52n", []float64{-17.6770019531, -17.6715087891, -42.1875, -42.1765136719}},
{"dh1sz", []float64{23.3349609375, 23.37890625, -87.5830078125, -87.5390625}},
{"h", []float64{-90.0, -45.0, 0.0, 45.0}},
{"7ny5k0k28", []float64{-6.4585018158, -6.45845890045, -36.3808822632, -36.3808393478}},
{"w7", []float64{16.875, 22.5, 101.25, 112.5}},
{"f9j0296wjz", []float64{50.6768792868, 50.6768846512, -60.443097353, -60.4430866241}},
{"v316n9gu7y5", []float64{50.9869372845, 50.9869386256, 58.2987718284, 58.2987731695}},
{"9", []float64{0.0, 45.0, -135.0, -90.0}},
{"0jt97bxt", []float64{-58.8391685486, -58.8389968872, -172.090530396, -172.090187073}},
{"cfdbvrc", []float64{59.236907959, 59.23828125, -97.1507263184, -97.1493530273}},
{"95d", []float64{19.6875, 21.09375, -132.1875, -130.78125}},
{"my", []float64{-11.25, -5.625, 78.75, 90.0}},
{"5t", []float64{-61.875, -56.25, -22.5, -11.25}},
{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
{"k2j", []float64{-45.0, -43.59375, 18.28125, 19.6875}},
{"rjygh", []float64{-12.12890625, -12.0849609375, 144.66796875, 144.711914062}},
{"xs79f", []float64{24.2138671875, 24.2578125, 162.509765625, 162.553710938}},
{"nmz3x305vedq", []float64{-57.3864214495, -57.3864212818, 111.764155068, 111.764155403}},
{"u1hq", []float64{51.6796875, 51.85546875, 5.9765625, 6.328125}},
{"2v8jpgs92zxm", []float64{-13.1641120277, -13.1641118601, -145.903202109, -145.903201774}},
{"f54n04uq", []float64{62.9458236694, 62.9459953308, -87.1816635132, -87.1813201904}},
{"8zkv13p8", []float64{41.6656494141, 41.6658210754, -139.505081177, -139.504737854}},
{"z03j6ktm2", []float64{47.354722023, 47.3547649384, 136.512336731, 136.512379646}},
{"qd266u", []float64{-31.9262695312, -31.9207763672, 112.972412109, 112.983398438}},
{"783q5", []float64{-42.5390625, -42.4951171875, -20.6103515625, -20.56640625}},
{"ynqe7fy5s9m6", []float64{80.7432531193, 80.7432532869, 99.3138598278, 99.3138601631}},
{"pp", []float64{-50.625, -45.0, 135.0, 146.25}},
{"bd", []float64{56.25, 61.875, -157.5, -146.25}},
{"8573xzmt2qy", []float64{18.5856847465, 18.5856860876, -175.081539452, -175.081538111}},
{"gwhbzrftu0", []float64{78.9253950119, 78.9254003763, -15.4981040955, -15.4980933666}},
{"h42w9e805", []float64{-76.1819458008, -76.1819028854, 0.769171714783, 0.769214630127}},
{"uzbhd66135d1", []float64{89.397358764, 89.3973589316, 33.8516691327, 33.851669468}},
{"zf", []float64{56.25, 61.875, 168.75, 180.0}},
{"6q2r", []float64{-8.61328125, -8.4375, -78.3984375, -78.046875}},
{"qtxy7v4w9", []float64{-12.9352855682, -12.9352426529, 123.566708565, 123.56675148}},
{"58", []float64{-90.0, -84.375, -22.5, -11.25}},
{"r", []float64{-45.0, 0.0, 135.0, 180.0}},
{"8u9qjb5qw", []float64{26.368303299, 26.3683462143, -144.234781265, -144.23473835}},
{"48sx", []float64{-85.95703125, -85.78125, -61.171875, -60.8203125}},
{"690tdt6", []float64{-38.3793640137, -38.3779907227, -66.6842651367, -66.6828918457}},
{"qm8", []float64{-14.0625, -12.65625, 101.25, 102.65625}},
{"2mj", []float64{-16.875, -15.46875, -161.71875, -160.3125}},
{"3e5", []float64{-28.125, -26.71875, -108.28125, -106.875}},
{"t", []float64{0.0, 45.0, 45.0, 90.0}},
{"f6dg1tndqxy", []float64{59.6177373827, 59.6177387238, -74.8076811433, -74.8076798022}},
{"c", []float64{45.0, 90.0, -135.0, -90.0}},
{"9f3eqkhmf", []float64{13.2504987717, 13.250541687, -98.8600444794, -98.860001564}},
{"yuqghk1x5z", []float64{69.4568055868, 69.4568109512, 133.431175947, 133.431186676}},
{"w30rynrjmy", []float64{7.02257037163, 7.02257573605, 101.875094175, 101.875104904}},
{"m25ergh6", []float64{-44.4118881226, -44.4117164612, 61.5182876587, 61.5186309814}},
{"jqznhq", []float64{-50.9436035156, -50.9381103516, 66.2805175781, 66.2915039062}},
{"3u4", []float64{-22.5, -21.09375, -98.4375, -97.03125}},
{"fzr", []float64{85.78125, 87.1875, -46.40625, -45.0}},
{"je", []float64{-73.125, -67.5, 67.5, 78.75}},
{"pmztf5q7e7d4", []float64{-56.6270351037, -56.6270349361, 156.893490851, 156.893491186}},
{"xdknsz", []float64{13.8372802734, 13.8427734375, 163.333740234, 163.344726562}},
{"736h1c7d5h", []float64{-37.2583937645, -37.2583884001, -30.8556604385, -30.8556497097}},
{"c57pmmv9u", []float64{64.5875501633, 64.5875930786, -130.542812347, -130.542769432}},
{"80qnjsh", []float64{2.48291015625, 2.48428344727, -171.315307617, -171.313934326}},
{"kp", []float64{-5.625, 0.0, 0.0, 11.25}},
{"u6yufbthbdw", []float64{61.3072863221, 61.3072876632, 20.8699330688, 20.8699344099}},
{"c4yj93f16ys", []float64{61.4454093575, 61.4454106987, -126.504698396, -126.504697055}},
{"ygkp", []float64{64.51171875, 64.6875, 129.375, 129.7265625}},
{"h1ypf", []float64{-78.7939453125, -78.75, 8.525390625, 8.5693359375}},
{"hz76my5h4qe", []float64{-48.7895616889, -48.7895603478, 38.5772185028, 38.5772198439}},
{"zh1cw", []float64{67.763671875, 67.8076171875, 137.724609375, 137.768554688}},
{"00", []float64{-90.0, -84.375, -180.0, -168.75}},
{"h9be5u0k", []float64{-79.6062469482, -79.6060752869, 23.3682632446, 23.3686065674}},
{"4btv", []float64{-86.30859375, -86.1328125, -48.1640625, -47.8125}},
{"42hz7pm83dt", []float64{-88.6857041717, -88.6857028306, -71.9308523834, -71.9308510423}},
{"qv49", []float64{-16.69921875, -16.5234375, 127.265625, 127.6171875}},
{"0nwzwd", []float64{-52.1081542969, -52.1026611328, -170.222167969, -170.211181641}},
{"jhkmc", []float64{-65.0830078125, -65.0390625, 51.0205078125, 51.064453125}},
{"sysens0py", []float64{37.1131467819, 37.1131896973, 40.3640270233, 40.3640699387}},
{"5q792kbf", []float64{-54.5975875854, -54.5974159241, -28.8161087036, -28.8157653809}},
{"624gbe", []float64{-44.3243408203, -44.3188476562, -74.8608398438, -74.8498535156}},
{"gtkjfqg", []float64{75.5790710449, 75.5804443359, -16.7720031738, -16.7706298828}},
{"nv4", []float64{-61.875, -60.46875, 126.5625, 127.96875}},
{"dcwv6uu0", []float64{9.3864440918, 9.38661575317, -46.6314697266, -46.6311264038}},
{"vvgtyfv9", []float64{78.36977005, 78.3699417114, 83.97605896, 83.9764022827}},
{"53rjpqr6zt9", []float64{-82.0550099015, -82.0550085604, -23.5773669183, -23.5773655772}},
{"vmyp4dnemp6", []float64{78.5858018696, 78.5858032107, 64.8065069318, 64.8065082729}},
{"t9xqjxjpv", []float64{9.53197002411, 9.53201293945, 77.9440927505, 77.9441356659}},
{"sby32e", []float64{4.45495605469, 4.46044921875, 42.5610351562, 42.5720214844}},
{"sjfgy", []float64{33.0029296875, 33.046875, 4.130859375, 4.1748046875}},
{"k7q0z2b2du", []float64{-26.5826869011, -26.5826815367, 20.0065648556, 20.0065755844}},
{"nt", []float64{-61.875, -56.25, 112.5, 123.75}},
{"1", []float64{-90.0, -45.0, -135.0, -90.0}},
{"mpfpfd32e", []float64{-0.0314998626709, -0.0314569473267, 47.9242086411, 47.9242515564}},
{"hqjqn", []float64{-55.1953125, -55.1513671875, 18.896484375, 18.9404296875}},
{"9q7chj6u2uw", []float64{35.3616240621, 35.3616254032, -118.296964467, -118.296963125}},
{"0wsf47v9c", []float64{-53.0650377274, -53.064994812, -150.713839531, -150.713796616}},
{"kdv72env8xke", []float64{-28.9424979128, -28.9424977452, 29.9140823632, 29.9140826985}},
{"trfx", []float64{44.82421875, 45.0, 59.765625, 60.1171875}},
{"02uttm", []float64{-84.7869873047, -84.7814941406, -162.191162109, -162.180175781}},
{"hhjgb5s3vv39", []float64{-66.8212655, -66.8212653324, 8.0920227617, 8.09202309698}},
{"r16", []float64{-37.96875, -36.5625, 137.8125, 139.21875}},
{"4xy44eer4t3", []float64{-46.0342316329, -46.0342302918, -58.9480648935, -58.9480635524}},
{"8b", []float64{0.0, 5.625, -146.25, -135.0}},
{"zd", []float64{56.25, 61.875, 157.5, 168.75}},
{"z0x", []float64{47.8125, 49.21875, 144.84375, 146.25}},
{"4967", []float64{-82.44140625, -82.265625, -64.3359375, -63.984375}},
{"2vf4", []float64{-12.3046875, -12.12890625, -143.4375, -143.0859375}},
{"tzp3t0rtg", []float64{39.6410322189, 39.6410751343, 89.1754674911, 89.1755104065}},
{"75yry", []float64{-22.5439453125, -22.5, -35.947265625, -35.9033203125}},
{"bdgtu", []float64{61.4794921875, 61.5234375, -152.40234375, -152.358398438}},
{"u1", []float64{50.625, 56.25, 0.0, 11.25}},
{"rz2bgp7hds", []float64{-4.04629468918, -4.04628932476, 169.940750599, 169.940761328}},
{"g", []float64{45.0, 90.0, -45.0, 0.0}},
{"psptppx32e", []float64{-66.5796643496, -66.5796589851, 168.364470005, 168.364480734}},
{"gshfctpwf1", []float64{68.0120283365, 68.0120337009, -15.7440090179, -15.7439982891}},
{"3yq", []float64{-9.84375, -8.4375, -92.8125, -91.40625}},
{"685zv36e0fd2", []float64{-43.6303004622, -43.6303002946, -61.9923811778, -61.9923808426}},
{"7gf5fd", []float64{-23.2360839844, -23.2305908203, -8.32763671875, -8.31665039062}},
{"bmmzfq", []float64{75.9265136719, 75.9320068359, -160.565185547, -160.554199219}},
{"m40", []float64{-33.75, -32.34375, 45.0, 46.40625}},
{"tx45501g7v", []float64{39.9029284716, 39.902933836, 70.4469001293, 70.4469108582}},
{"u", []float64{45.0, 90.0, 0.0, 45.0}},
{"ej054jn", []float64{28.6798095703, 28.6811828613, -44.9038696289, -44.9024963379}},
{"n1g7d", []float64{-79.541015625, -79.4970703125, 94.658203125, 94.7021484375}},
{"nn6ejehuf", []float64{-54.2991113663, -54.2990684509, 93.7639331818, 93.7639760971}},
{"qs8e93xwrrc", []float64{-19.0629114211, -19.06291008, 113.268668801, 113.268670142}},
{"f2", []float64{45.0, 50.625, -78.75, -67.5}},
{"gm", []float64{73.125, 78.75, -33.75, -22.5}},
{"npp4rnm", []float64{-50.1951599121, -50.1937866211, 100.158233643, 100.159606934}},
{"6t", []float64{-16.875, -11.25, -67.5, -56.25}},
{"2f4fe3d5", []float64{-33.3017921448, -33.3016204834, -142.237243652, -142.23690033}},
{"s7r00k0196", []float64{18.3034908772, 18.3034962416, 21.1047899723, 21.1048007011}},
{"st084", []float64{28.125, 28.1689453125, 23.291015625, 23.3349609375}},
{"p6f3bv9f1", []float64{-74.1930770874, -74.1930341721, 149.449467659, 149.449510574}},
{"fgk5j", []float64{63.80859375, 63.8525390625, -50.4052734375, -50.361328125}},
{"yeu22jjtp", []float64{66.1660194397, 66.166062355, 118.484416008, 118.484458923}},
{"3bcn7gkusn25", []float64{-39.6639578976, -39.6639577299, -99.6722602844, -99.6722599491}},
{"1", []float64{-90.0, -45.0, -135.0, -90.0}},
{"6q4dh71sqn", []float64{-10.8811962605, -10.881190896, -75.0452899933, -75.0452792645}},
{"p", []float64{-90.0, -45.0, 135.0, 180.0}},
{"6uy2kbef9yg", []float64{-18.2340927422, -18.2340914011, -47.2469682992, -47.246966958}},
{"58j", []float64{-90.0, -88.59375, -15.46875, -14.0625}},
{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
{"mh6x", []float64{-19.86328125, -19.6875, 48.515625, 48.8671875}},
{"cgq4xrjz", []float64{63.7603569031, 63.7605285645, -92.486000061, -92.4856567383}},
{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
{"ppqg570sk3n", []float64{-48.6741918325, -48.6741904914, 144.635886848, 144.635888189}},
{"1suyu1k", []float64{-62.0878601074, -62.0864868164, -105.639038086, -105.637664795}},
{"xm4xkzsnvux", []float64{29.4417956471, 29.4417969882, 149.980114549, 149.980115891}},
{"8p3xj", []float64{42.01171875, 42.0556640625, -177.670898438, -177.626953125}},
{"ef92nkk", []float64{14.0858459473, 14.0872192383, -9.21203613281, -9.2106628418}},
{"qnrf101", []float64{-9.4921875, -9.49081420898, 100.943756104, 100.945129395}},
{"2qt", []float64{-8.4375, -7.03125, -161.71875, -160.3125}},
{"c2q7e", []float64{47.021484375, 47.0654296875, -114.829101562, -114.78515625}},
{"w", []float64{0.0, 45.0, 90.0, 135.0}},
{"j6t0bwpjpusg", []float64{-75.7718221284, -75.7718219608, 63.3131746575, 63.3131749928}},
{"wrq", []float64{40.78125, 42.1875, 109.6875, 111.09375}},
{"xvf2jk", []float64{32.3657226562, 32.3712158203, 172.144775391, 172.155761719}},
{"xy0p5y", []float64{35.0134277344, 35.0189208984, 168.914794922, 168.92578125}},
{"bsh9xbd", []float64{67.766418457, 67.767791748, -150.828552246, -150.827178955}},
{"g675yc", []float64{58.3209228516, 58.3264160156, -29.2346191406, -29.2236328125}},
{"dkrnq7", []float64{25.0213623047, 25.0268554688, -68.6315917969, -68.6206054688}},
{"6q4uk3dyk5", []float64{-10.4936009645, -10.4935956001, -74.6920967102, -74.6920859814}},
{"t58tp1kxb54p", []float64{20.5746203475, 20.5746205151, 46.0169246793, 46.0169250146}},
{"bbw73yzeuy", []float64{48.4215438366, 48.421549201, -137.373529673, -137.373518944}},
{"gnq", []float64{80.15625, 81.5625, -36.5625, -35.15625}},
{"3j", []float64{-16.875, -11.25, -135.0, -123.75}},
{"7dx2c", []float64{-30.8056640625, -30.76171875, -12.2607421875, -12.216796875}},
{"vn9", []float64{81.5625, 82.96875, 46.40625, 47.8125}},
{"4kj", []float64{-67.5, -66.09375, -71.71875, -70.3125}},
{"cuvj4trg5nb8", []float64{72.6270465553, 72.6270467229, -94.0981142968, -94.0981139615}},
{"uetmbuswe", []float64{65.7240772247, 65.7241201401, 29.92208004, 29.9221229553}},
{"z", []float64{45.0, 90.0, 135.0, 180.0}},
{"f", []float64{45.0, 90.0, -90.0, -45.0}},
{"jg", []float64{-73.125, -67.5, 78.75, 90.0}},
{"ycz", []float64{54.84375, 56.25, 133.59375, 135.0}},
{"pevtd", []float64{-67.939453125, -67.8955078125, 165.322265625, 165.366210938}},
{"gf7fm3hmb8", []float64{58.0582380295, 58.0582433939, -5.73999166489, -5.73998093605}},
{"w7zwjnh24qd", []float64{22.1814313531, 22.1814326942, 112.022537291, 112.022538632}},
{"nfesgy", []float64{-75.0695800781, -75.0640869141, 128.836669922, 128.84765625}},
{"s", []float64{0.0, 45.0, 0.0, 45.0}},
{"efq0q0dg0g0n", []float64{12.7034739777, 12.7034741454, -2.5450193882, -2.54501905292}},
{"kkucr8pk", []float64{-18.060836792, -18.0606651306, 18.2692337036, 18.2695770264}},
{"5zdg4zvz", []float64{-47.2413825989, -47.2412109375, -7.25406646729, -7.25372314453}},
{"fuw", []float64{70.3125, 71.71875, -47.8125, -46.40625}},
{"x51mnftp8", []float64{17.7689266205, 17.7689695358, 137.061309814, 137.06135273}},
{"y0", []float64{45.0, 50.625, 90.0, 101.25}},
{"ndufku4sr", []float64{-74.1130399704, -74.1129970551, 119.392161369, 119.392204285}},
{"ydwndhywhg8", []float64{60.232219398, 60.2322207391, 121.034520864, 121.034522206}},
{"gj6ehkq0", []float64{75.0819396973, 75.0821113586, -41.2893676758, -41.289024353}},
{"m3hfct0", []float64{-38.8641357422, -38.8627624512, 62.9956054688, 62.9969787598}},
{"6745yupp70qu", []float64{-27.4426010996, -27.442600932, -75.631118305, -75.6311179698}},
{"d7b0m9dzj213", []float64{21.1471368559, 21.1471370235, -78.504297249, -78.5042969137}},
{"py", []float64{-56.25, -50.625, 168.75, 180.0}},
{"4vhrpypw", []float64{-60.6105422974, -60.610370636, -49.9225616455, -49.9222183228}},
{"xwyyj1xz5kzw", []float64{39.0329053625, 39.0329055302, 167.222706601, 167.222706936}},
{"18ht0j2w8ste", []float64{-89.0911141969, -89.0911140293, -106.171159521, -106.171159185}},
{"vynwqurve", []float64{79.8729228973, 79.8729658127, 88.1980276108, 88.1980705261}},
{"s77hhn", []float64{19.0173339844, 19.0228271484, 15.64453125, 15.6555175781}},
{"hj66tgs86", []float64{-60.0100278854, -60.0099849701, 3.42301368713, 3.42305660248}},
{"e5nh4k", []float64{17.6000976562, 17.6055908203, -36.4636230469, -36.4526367188}},
{"jk", []float64{-67.5, -61.875, 56.25, 67.5}},
{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
{"f0p3v5", []float64{45.3240966797, 45.3295898438, -79.5849609375, -79.5739746094}},
{"numc175r1", []float64{-65.9002876282, -65.9002447128, 131.895375252, 131.895418167}},
{"7pc", []float64{-1.40625, 0.0, -43.59375, -42.1875}},
{"b7qw82mfqc4", []float64{64.4255930185, 64.4255943596, -159.590199888, -159.590198547}},
{"qfe", []float64{-30.9375, -29.53125, 127.96875, 129.375}},
{"mw9kj6nrue61", []float64{-7.72204069421, -7.72204052657, 69.4973042607, 69.497304596}},
{"6en5d6psj", []float64{-27.4980926514, -27.498049736, -58.9531087875, -58.9530658722}},
{"mk80", []float64{-19.6875, -19.51171875, 56.25, 56.6015625}},
{"d2fbpmpyjv", []float64{4.24727261066, 4.24727797508, -74.5533192158, -74.5533084869}},
{"pf84wbwguh9", []float64{-75.4946324229, -75.4946310818, 169.056073576, 169.056074917}},
{"ncj8287", []float64{-84.3296813965, -84.3283081055, 131.510467529, 131.51184082}},
{"smd4t", []float64{31.376953125, 31.4208984375, 14.2822265625, 14.326171875}},
{"4ryj3jjxrd", []float64{-45.4546773434, -45.454671979, -70.2606797218, -70.260668993}},
{"udffsxnn8", []float64{60.9477710724, 60.9478139877, 26.5731811523, 26.5732240677}},
{"cub7vr", []float64{72.4163818359, 72.421875, -100.667724609, -100.656738281}},
{"y7c6s4", []float64{66.5441894531, 66.5496826172, 103.18359375, 103.194580078}},
{"t253", []float64{0.17578125, 0.3515625, 60.8203125, 61.171875}},
{"1e2bhmk9ybw", []float64{-71.6896077991, -71.6896064579, -111.252067387, -111.252066046}},
{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
{"99r75", []float64{7.55859375, 7.6025390625, -102.172851562, -102.12890625}},
{"knbr2kzz1791", []float64{-5.72952283546, -5.72952266783, 0.373246818781, 0.373247154057}},
{"v8h", []float64{45.0, 46.40625, 73.125, 74.53125}},
{"sm6xvf3bc", []float64{30.9060430527, 30.906085968, 15.0207567215, 15.0207996368}},
{"vu", []float64{67.5, 73.125, 78.75, 90.0}},
{"w56htc6", []float64{19.0791320801, 19.0805053711, 93.0679321289, 93.0693054199}},
{"t", []float64{0.0, 45.0, 45.0, 90.0}},
{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
{"5hm57j8", []float64{-65.4922485352, -65.4908752441, -37.8369140625, -37.8355407715}},
{"k8qwr0e", []float64{-42.4923706055, -42.4909973145, 31.9523620605, 31.9537353516}},
{"716e0", []float64{-37.44140625, -37.3974609375, -41.484375, -41.4404296875}},
{"wz71b", []float64{41.0888671875, 41.1328125, 127.96875, 128.012695312}},
{"w", []float64{0.0, 45.0, 90.0, 135.0}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"89x2", []float64{8.4375, 8.61328125, -147.3046875, -146.953125}},
{"rcr37p", []float64{-37.7105712891, -37.705078125, 179.077148438, 179.088134766}},
{"4xzjc7vmstr", []float64{-45.3739361465, -45.3739348054, -57.5939060748, -57.5939047337}},
{"tv07ndf8", []float64{28.6674499512, 28.6676216125, 79.3906402588, 79.3909835815}},
{"qb2z", []float64{-42.36328125, -42.1875, 124.8046875, 125.15625}},
{"xjq0fjferr", []float64{29.6952670813, 29.6952724457, 143.529134989, 143.529145718}},
{"zwn6", []float64{79.1015625, 79.27734375, 166.2890625, 166.640625}},
{"7xc", []float64{-1.40625, 0.0, -21.09375, -19.6875}},
{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
{"vw4sy8z", []float64{79.5890808105, 79.5904541016, 71.3108825684, 71.3122558594}},
{"djg8s3pre", []float64{32.4384212494, 32.4384641647, -84.881272316, -84.8812294006}},
{"vpn8t", []float64{84.462890625, 84.5068359375, 54.3603515625, 54.404296875}},
{"1sse8x6", []float64{-64.0324401855, -64.0310668945, -106.147155762, -106.145782471}},
{"snm4", []float64{35.5078125, 35.68359375, 7.03125, 7.3828125}},
{"t", []float64{0.0, 45.0, 45.0, 90.0}},
{"5rb", []float64{-46.40625, -45.0, -33.75, -32.34375}},
{"q7", []float64{-28.125, -22.5, 101.25, 112.5}},
{"8", []float64{0.0, 45.0, -180.0, -135.0}},
{"hn5sw9rbvjk3", []float64{-55.4519608431, -55.4519606754, 5.21838281304, 5.21838314831}},
{"y0f7w5tep1f", []float64{49.8537348211, 49.8537361622, 93.4355905652, 93.4355919063}},
{"xts0uk", []float64{31.0913085938, 31.0968017578, 163.311767578, 163.322753906}},
{"ftybqfey", []float64{77.4024581909, 77.4026298523, -57.7060317993, -57.7056884766}},
{"eqvhf", []float64{38.8037109375, 38.84765625, -26.630859375, -26.5869140625}},
{"ctpbp73", []float64{73.1428527832, 73.1442260742, -101.281585693, -101.280212402}},
{"15czhy35", []float64{-67.6409339905, -67.6407623291, -132.328948975, -132.328605652}},
{"1", []float64{-90.0, -45.0, -135.0, -90.0}},
{"vk6mwwrysbf", []float64{69.9084989727, 69.9085003138, 59.7105565667, 59.7105579078}},
{"4yfcbqvevqy", []float64{-51.6858740151, -51.685872674, -52.3640397191, -52.364038378}},
{"d6qm1q41g", []float64{13.5684156418, 13.5684585571, -69.9031305313, -69.903087616}},
{"kjqew1cty", []float64{-14.842915535, -14.8428726196, 9.40661430359, 9.40665721893}},
{"hf9zn39ewerv", []float64{-74.6981724165, -74.6981722489, 36.4879449829, 36.4879453182}},
{"1j", []float64{-61.875, -56.25, -135.0, -123.75}},
{"u41", []float64{56.25, 57.65625, 1.40625, 2.8125}},
{"pd8sbu5sk", []float64{-75.0798368454, -75.0797939301, 158.241062164, 158.24110508}},
{"k7", []float64{-28.125, -22.5, 11.25, 22.5}},
{"fx6xcm", []float64{87.1710205078, 87.1765136719, -63.9294433594, -63.9184570312}},
{"k1nwc4mun", []float64{-38.1754302979, -38.1753873825, 9.19272422791, 9.19276714325}},
{"nechx1mg", []float64{-68.1078529358, -68.1076812744, 114.221763611, 114.222106934}},
{"8et6dbj4g", []float64{20.1274251938, 20.1274681091, -149.98934269, -149.989299774}},
{"7e", []float64{-28.125, -22.5, -22.5, -11.25}},
{"vqcthybtw0", []float64{83.885679245, 83.8856846094, 58.5690593719, 58.5690701008}},
{"r6qdv32n", []float64{-31.8524551392, -31.8522834778, 155.621337891, 155.621681213}},
{"tbhh", []float64{0.703125, 0.87890625, 84.375, 84.7265625}},
{"0c5fpu", []float64{-84.0014648438, -83.9959716797, -140.635986328, -140.625}},
{"7b", []float64{-45.0, -39.375, -11.25, 0.0}},
{"9vzmkfvug", []float64{33.2825231552, 33.2825660706, -90.8379220963, -90.8378791809}},
{"68t", []float64{-42.1875, -40.78125, -60.46875, -59.0625}},
{"ef1szshm45h", []float64{12.1078079939, 12.107809335, -8.80510747433, -8.80510613322}},
{"21dgj4", []float64{-36.0241699219, -36.0186767578, -175.913085938, -175.902099609}},
{"109q9yt", []float64{-86.0092163086, -86.0078430176, -133.158416748, -133.157043457}},
{"nhj3b9vc", []float64{-67.182598114, -67.1824264526, 97.4126815796, 97.4130249023}},
{"nye5uzgb", []float64{-52.735748291, -52.7355766296, 128.182640076, 128.182983398}},
{"dhz5f", []float64{27.3779296875, 27.421875, -80.068359375, -80.0244140625}},
{"g1verehd", []float64{55.4318618774, 55.4320335388, -36.9298553467, -36.9295120239}},
{"jtr", []float64{-60.46875, -59.0625, 77.34375, 78.75}},
{"m5nbruj", []float64{-28.0590820312, -28.0577087402, 54.839630127, 54.841003418}},
{"p", []float64{-90.0, -45.0, 135.0, 180.0}},
{"h", []float64{-90.0, -45.0, 0.0, 45.0}},
{"bm3gr", []float64{75.1025390625, 75.146484375, -165.981445312, -165.9375}},
{"e7my1m0qp", []float64{19.3644332886, 19.3644762039, -25.6084871292, -25.6084442139}},
{"fzue", []float64{89.12109375, 89.296875, -49.921875, -49.5703125}},
{"q70", []float64{-28.125, -26.71875, 101.25, 102.65625}},
{"sjeed", []float64{31.552734375, 31.5966796875, 5.009765625, 5.0537109375}},
{"cvsuyyw", []float64{76.8081665039, 76.8095397949, -94.2654418945, -94.2640686035}},
{"7dnp", []float64{-32.51953125, -32.34375, -14.0625, -13.7109375}},
{"tf9kr1u", []float64{14.8191833496, 14.8205566406, 80.8209228516, 80.8222961426}},
{"j38nduwqew", []float64{-80.3940546513, -80.3940492868, 56.3795828819, 56.3795936108}},
{"444y82r", []float64{-77.606048584, -77.604675293, -86.1122131348, -86.1108398438}},
{"1rwzsww", []float64{-46.4584350586, -46.4570617676, -114.051818848, -114.050445557}},
{"98vu", []float64{4.921875, 5.09765625, -104.4140625, -104.0625}},
{"f0hu79k2y84", []float64{45.7540655136, 45.7540668547, -83.1603857875, -83.1603844464}},
{"35399zm2gnn", []float64{-26.415091753, -26.4150904119, -132.806374133, -132.806372792}},
{"qzxy", []float64{-1.7578125, -1.58203125, 134.6484375, 135.0}},
{"7gpr25", []float64{-26.8341064453, -26.8286132812, -1.0546875, -1.04370117188}},
{"xucdp", []float64{27.0703125, 27.1142578125, 171.166992188, 171.2109375}},
{"db3mpnz89zq", []float64{2.32235983014, 2.32236117125, -54.1741874814, -54.1741861403}},
{"p", []float64{-90.0, -45.0, 135.0, 180.0}},
{"94m52", []float64{13.2275390625, 13.271484375, -127.96875, -127.924804688}},
{"u7ucp", []float64{66.26953125, 66.3134765625, 18.2373046875, 18.28125}},
{"81qq43p", []float64{8.09143066406, 8.09280395508, -171.10244751, -171.101074219}},
{"f80w8", []float64{46.142578125, 46.1865234375, -66.796875, -66.7529296875}},
{"8j5z", []float64{29.35546875, 29.53125, -174.7265625, -174.375}},
{"56q", []float64{-77.34375, -75.9375, -25.3125, -23.90625}},
{"b72vvhj", []float64{64.3139648438, 64.3153381348, -167.468719482, -167.467346191}},
{"5j", []float64{-61.875, -56.25, -45.0, -33.75}},
{"42hm9tj0rn", []float64{-89.0056622028, -89.0056568384, -72.7003526688, -72.7003419399}},
{"cbxx9q2c89", []float64{49.1654545069, 49.1654598713, -90.6471419334, -90.6471312046}},
{"43", []float64{-84.375, -78.75, -78.75, -67.5}},
{"rvmw", []float64{-14.4140625, -14.23828125, 176.484375, 176.8359375}},
{"jwmeyr4hj7", []float64{-54.1454154253, -54.1454100609, 75.5120050907, 75.5120158195}},
{"b3y", []float64{54.84375, 56.25, -160.3125, -158.90625}},
{"4y3n0e7u8j", []float64{-53.7704104185, -53.7704050541, -54.8166275024, -54.8166167736}},
{"m0k0x", []float64{-43.505859375, -43.4619140625, 50.9326171875, 50.9765625}},
{"2zc1v219ev8z", []float64{-1.09834464267, -1.09834447503, -144.610815234, -144.610814899}},
{"3rvj3ezyffez", []float64{-0.46162577346, -0.461625605822, -116.64206598, -116.642065644}},
{"35bdpq", []float64{-23.5217285156, -23.5162353516, -133.978271484, -133.967285156}},
{"qdqrzp2e7b", []float64{-30.9410619736, -30.9410566092, 121.597527266, 121.597537994}},
{"vmrsrejf", []float64{75.2951431274, 75.2953147888, 67.1343612671, 67.1347045898}},
{"up", []float64{84.375, 90.0, 0.0, 11.25}},
{"bzy", []float64{88.59375, 90.0, -137.8125, -136.40625}},
{"3rnm42gs62", []float64{-4.7412443161, -4.74123895168, -114.857157469, -114.85714674}},
{"yhekty3621c", []float64{71.1382435262, 71.1382448673, 94.8247160017, 94.8247173429}},
{"ektx", []float64{26.54296875, 26.71875, -26.015625, -25.6640625}},
{"9nxkb4u9f6", []float64{37.4128782749, 37.4128836393, -124.798411131, -124.798400402}},
{"fg", []float64{61.875, 67.5, -56.25, -45.0}},
{"66e4x10k68", []float64{-30.4918241501, -30.4918187857, -74.2231822014, -74.2231714725}},
{"me", []float64{-28.125, -22.5, 67.5, 78.75}},
{"r385f5q9", []float64{-35.8852958679, -35.8851242065, 146.346817017, 146.347160339}},
{"xbdc8wgn0", []float64{3.11428070068, 3.11432361603, 172.643280029, 172.643322945}},
{"74s", []float64{-30.9375, -29.53125, -39.375, -37.96875}},
{"dg8t7", []float64{20.6103515625, 20.654296875, -55.4150390625, -55.37109375}},
{"nf7", []float64{-77.34375, -75.9375, 127.96875, 129.375}},
{"6nzfqpxy", []float64{-6.59351348877, -6.59334182739, -78.8272476196, -78.8269042969}},
{"0ux06p9jktht", []float64{-64.6014270745, -64.6014269069, -136.31678693, -136.316786595}},
{"nb8pxznpjh", []float64{-85.8294653893, -85.8294600248, 124.099030495, 124.099041224}},
{"6qks2x97hzm", []float64{-9.05492708087, -9.05492573977, -72.3979751766, -72.3979738355}},
{"us6qrd43em", []float64{70.0161534548, 70.0161588192, 25.9968817234, 25.9968924522}},
{"tp2eh", []float64{41.30859375, 41.3525390625, 45.87890625, 45.9228515625}},
{"vcgf16q2", []float64{55.2076721191, 55.2078437805, 84.0869522095, 84.0872955322}},
{"qt15nkc82kz", []float64{-16.3214953244, -16.3214939833, 114.182988256, 114.182989597}},
{"t6t", []float64{14.0625, 15.46875, 63.28125, 64.6875}},
{"yx53b3kj32", []float64{84.6903848648, 84.6903902292, 117.086845636, 117.086856365}},
{"twqdxev4cx", []float64{35.6168121099, 35.6168174744, 76.9771456718, 76.9771564007}},
{"p", []float64{-90.0, -45.0, 135.0, 180.0}},
{"4gty084hgddy", []float64{-69.2569826916, -69.2569825239, -48.13918937, -48.1391890347}},
{"c7", []float64{61.875, 67.5, -123.75, -112.5}},
{"ywffc641", []float64{83.463306427, 83.4634780884, 116.424865723, 116.425209045}},
{"k0km", []float64{-42.71484375, -42.5390625, 5.9765625, 6.328125}},
{"17k4fg", []float64{-71.2188720703, -71.2133789062, -118.004150391, -117.993164062}},
{"9fr", []float64{12.65625, 14.0625, -91.40625, -90.0}},
{"w", []float64{0.0, 45.0, 90.0, 135.0}},
{"h08scsx", []float64{-86.3278198242, -86.3264465332, 0.778656005859, 0.780029296875}},
{"8f8nq48", []float64{15.1748657227, 15.1762390137, -145.986328125, -145.984954834}},
{"hecr6qx49k0", []float64{-67.59567976, -67.5956784189, 24.3663561344, 24.3663574755}},
{"jn", []float64{-56.25, -50.625, 45.0, 56.25}},
{"qwx7j", []float64{-7.91015625, -7.8662109375, 122.915039062, 122.958984375}},
{"z", []float64{45.0, 90.0, 135.0, 180.0}},
{"wxcj4", []float64{44.47265625, 44.5166015625, 113.994140625, 114.038085938}},
{"gw63h", []float64{80.33203125, 80.3759765625, -19.16015625, -19.1162109375}},
{"hp7b6f27tq6p", []float64{-49.1618095525, -49.1618093848, 5.3948584199, 5.39485875517}},
{"kd", []float64{-33.75, -28.125, 22.5, 33.75}},
{"fbweu", []float64{48.4716796875, 48.515625, -46.93359375, -46.8896484375}},
{"m1fcuue7nm1g", []float64{-34.8233712651, -34.8233710974, 49.080661498, 49.0806618333}},
{"h9j", []float64{-84.375, -82.96875, 29.53125, 30.9375}},
{"n9d3g5uv", []float64{-81.2334251404, -81.233253479, 115.80242157, 115.802764893}},
{"nhpp1spf", []float64{-66.247215271, -66.2470436096, 99.9203109741, 99.9206542969}},
{"7jg2w13b23h", []float64{-12.5614446402, -12.5614432991, -40.1635962725, -40.1635949314}},
{"6q4z0ebf3", []float64{-9.99854564667, -9.99850273132, -74.8597669601, -74.8597240448}},
{"sv9vqgeecr", []float64{31.8802589178, 31.8802642822, 36.5124285221, 36.5124392509}},
{"wqd4", []float64{36.9140625, 37.08984375, 104.0625, 104.4140625}},
{"bwqgj", []float64{80.68359375, 80.7275390625, -147.788085938, -147.744140625}},
{"hk73qx", []float64{-65.8355712891, -65.830078125, 16.1059570312, 16.1169433594}},
{"gdx1d6hr76v", []float64{59.3384175003, 59.3384188414, -12.5513903797, -12.5513890386}},
{"47czd", []float64{-67.587890625, -67.5439453125, -76.201171875, -76.1572265625}},
{"1kpebdk50", []float64{-66.8279457092, -66.8279027939, -113.17565918, -113.175616264}},
{"g3z7", []float64{55.37109375, 55.546875, -23.5546875, -23.203125}},
{"8x", []float64{39.375, 45.0, -157.5, -146.25}},
{"nczvrs", []float64{-79.2114257812, -79.2059326172, 134.978027344, 134.989013672}},
{"fbyjmwc6", []float64{50.1790237427, 50.1791954041, -47.5690841675, -47.5687408447}},
{"yhz41tus5wm", []float64{72.1026183665, 72.1026197076, 99.9160046875, 99.9160060287}},
{"uktff1pp0", []float64{70.8025932312, 70.8026361465, 19.4334411621, 19.4334840775}},
{"hrt8", []float64{-47.8125, -47.63671875, 18.984375, 19.3359375}},
{"b5vkzheshz3", []float64{66.9541557133, 66.9541570544, -172.304558605, -172.304557264}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"h6mvgwv1kmp", []float64{-76.2956875563, -76.2956862152, 19.4968043268, 19.4968056679}},
{"etzq7udz", []float64{33.4683036804, 33.4684753418, -12.1361160278, -12.1357727051}},
{"rf2x5pd6", []float64{-31.0717391968, -31.0715675354, 169.588050842, 169.588394165}},
{"k8kquxqbt2", []float64{-42.3673152924, -42.3673099279, 28.6838114262, 28.683822155}},
{"bz91jncb2c7", []float64{87.4004097283, 87.4004110694, -144.621583968, -144.621582627}},
{"3uk4y5r6sjwr", []float64{-20.5920389481, -20.5920387805, -95.3511917219, -95.3511913866}},
{"d", []float64{0.0, 45.0, -90.0, -45.0}},
{"y95n1hd", []float64{51.7044067383, 51.7057800293, 116.765441895, 116.766815186}},
{"629k5b3kbkc", []float64{-41.4821608365, -41.4821594954, -76.8256638944, -76.8256625533}},
{"pp42st7vp5", []float64{-50.5073958635, -50.5073904991, 138.367266655, 138.367277384}},
{"u17e", []float64{52.55859375, 52.734375, 4.921875, 5.2734375}},
{"s", []float64{0.0, 45.0, 0.0, 45.0}},
{"ex5w6znubms", []float64{40.5129298568, 40.5129311979, -17.447989583, -17.4479882419}},
{"8jsn", []float64{31.9921875, 32.16796875, -174.375, -174.0234375}},
{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
{"mht9efkj", []float64{-19.410610199, -19.4104385376, 52.9046630859, 52.9050064087}},
{"kkbcqqfcsz", []float64{-18.0241495371, -18.0241441727, 12.5833261013, 12.5833368301}},
{"866rppwznm", []float64{13.9291459322, 13.9291512966, -165.268782377, -165.268771648}},
{"96wj53wczp0c", []float64{14.9499841221, 14.9499842897, -115.160106607, -115.160106272}},
{"9ctzrj", []float64{9.73937988281, 9.74487304688, -92.8564453125, -92.8454589844}},
{"d", []float64{0.0, 45.0, -90.0, -45.0}},
{"wfq5hd", []float64{13.1945800781, 13.2000732422, 132.385253906, 132.396240234}},
{"9y6vu2v8wm5", []float64{36.1712247133, 36.1712260544, -97.1882195771, -97.188218236}},
{"6xcpg", []float64{-0.0439453125, 0.0, -65.9619140625, -65.91796875}},
{"rxqgqmc", []float64{-3.61587524414, -3.61450195312, 167.268218994, 167.269592285}},
{"yye", []float64{81.5625, 82.96875, 127.96875, 129.375}},
{"r3", []float64{-39.375, -33.75, 146.25, 157.5}},
{"x7t", []float64{19.6875, 21.09375, 153.28125, 154.6875}},
{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
{"g9mmh9sku8h", []float64{52.9192113876, 52.9192127287, -14.9133986235, -14.9133972824}},
{"v2q6qu3", []float64{46.8251037598, 46.8264770508, 65.3370666504, 65.3384399414}},
{"7j9ckmu00j", []float64{-13.8111609221, -13.8111555576, -42.3468017578, -42.346791029}},
{"4q3td", []float64{-53.876953125, -53.8330078125, -76.552734375, -76.5087890625}},
{"9ve2c92z33ke", []float64{31.077454146, 31.0774543136, -96.6126798838, -96.6126795486}},
{"9sscvm0mw1kw", []float64{25.6485348567, 25.6485350244, -105.58899276, -105.588992424}},
{"9u", []float64{22.5, 28.125, -101.25, -90.0}},
{"nv4j7yb8mjjy", []float64{-60.9149988368, -60.9149986692, 126.728203855, 126.728204191}},
{"80w8", []float64{2.8125, 2.98828125, -170.859375, -170.5078125}},
{"q06ch78z1", []float64{-43.3975410461, -43.3974981308, 94.0550279617, 94.0550708771}},
{"u", []float64{45.0, 90.0, 0.0, 45.0}},
{"gzw", []float64{87.1875, 88.59375, -2.8125, -1.40625}},
{"1", []float64{-90.0, -45.0, -135.0, -90.0}},
{"u", []float64{45.0, 90.0, 0.0, 45.0}},
{"furb", []float64{68.90625, 69.08203125, -45.3515625, -45.0}},
{"xen8pyc36", []float64{16.9122934341, 16.9123363495, 166.983003616, 166.983046532}},
{"n1gc29sd", []float64{-79.9279403687, -79.9277687073, 95.3015899658, 95.3019332886}},
{"cjvu", []float64{78.046875, 78.22265625, -126.9140625, -126.5625}},
{"w7x53f7fb0", []float64{20.2716207504, 20.2716261148, 111.175804138, 111.175814867}},
{"hz", []float64{-50.625, -45.0, 33.75, 45.0}},
{"8tz14", []float64{32.51953125, 32.5634765625, -147.568359375, -147.524414062}},
{"z", []float64{45.0, 90.0, 135.0, 180.0}},
{"pyfunm", []float64{-51.3006591797, -51.2951660156, 172.891845703, 172.902832031}},
{"b", []float64{45.0, 90.0, -180.0, -135.0}},
{"7xre21ceuxv", []float64{-3.63716259599, -3.63716125488, -11.9508652389, -11.9508638978}},
{"17", []float64{-73.125, -67.5, -123.75, -112.5}},
{"ru", []float64{-22.5, -16.875, 168.75, 180.0}},
{"bdjs6y77m", []float64{57.0319604874, 57.0320034027, -149.640097618, -149.640054703}},
{"u1vsgx2", []float64{55.718536377, 55.719909668, 7.88818359375, 7.88955688477}},
{"pj5e80uz", []float64{-61.2544441223, -61.2542724609, 139.928398132, 139.928741455}},
{"ju01xcg9", []float64{-67.2265434265, -67.2263717651, 79.0953826904, 79.0957260132}},
{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
{"k05kz1", []float64{-44.1595458984, -44.1540527344, 4.8779296875, 4.88891601562}},
{"ru2h1xc", []float64{-20.3480529785, -20.3466796875, 168.81729126, 168.818664551}},
{"ud2s6zr", []float64{58.443145752, 58.444519043, 23.3335876465, 23.3349609375}},
{"mrq5fm1zrqp", []float64{-3.5308277607, -3.53082641959, 64.7891007364, 64.7891020775}},
{"0y2u0dg2rg", []float64{-54.1254597902, -54.1254544258, -145.168544054, -145.168533325}},
{"9nkt0rnyx3", []float64{36.0747295618, 36.0747349262, -128.651307821, -128.651297092}},
{"77q", []float64{-26.71875, -25.3125, -25.3125, -23.90625}},
{"ng76t7", []float64{-71.2628173828, -71.2573242188, 128.551025391, 128.562011719}},
{"4ewypr0y27up", []float64{-69.2182661779, -69.2182660103, -57.6881629229, -57.6881625876}},
{"ge7vkm2x73", []float64{64.2341905832, 64.2341959476, -17.0389688015, -17.0389580727}},
{"3qm4d", []float64{-9.404296875, -9.3603515625, -116.630859375, -116.586914062}},
{"6gqw9", []float64{-25.576171875, -25.5322265625, -47.0654296875, -47.021484375}},
{"32", []float64{-45.0, -39.375, -123.75, -112.5}},
{"ns85", []float64{-64.16015625, -63.984375, 112.5, 112.8515625}},
{"hzy00b4j5tcj", []float64{-46.4053600095, -46.4053598419, 42.2233571112, 42.2233574465}},
{"7qrk5nt", []float64{-9.10491943359, -9.10354614258, -23.4159851074, -23.4146118164}},
{"vd4219t7th", []float64{56.2588620186, 56.258867383, 70.7374048233, 70.7374155521}},
{"g", []float64{45.0, 90.0, -45.0, 0.0}},
{"pq1p16t", []float64{-55.0057983398, -55.0044250488, 147.718048096, 147.719421387}},
{"gryfsc3wgn", []float64{89.0412604809, 89.0412658453, -24.0468835831, -24.0468728542}},
{"np0u", []float64{-49.921875, -49.74609375, 91.0546875, 91.40625}},
{"u87", []float64{46.40625, 47.8125, 26.71875, 28.125}},
{"9qz2", []float64{37.96875, 38.14453125, -113.5546875, -113.203125}},
{"xunf", []float64{22.8515625, 23.02734375, 178.2421875, 178.59375}},
{"ve", []float64{61.875, 67.5, 67.5, 78.75}},
{"s2c1tf2bvp4s", []float64{4.49494846165, 4.49494862929, 12.9101834446, 12.9101837799}},
{"5znd0f", []float64{-50.2624511719, -50.2569580078, -2.07641601562, -2.0654296875}},
{"dn90ug", []float64{36.7108154297, 36.7163085938, -88.3850097656, -88.3740234375}},
{"24bg3", []float64{-28.9599609375, -28.916015625, -178.901367188, -178.857421875}},
{"x46xpb2", []float64{13.888092041, 13.889465332, 138.856201172, 138.857574463}},
{"83q", []float64{7.03125, 8.4375, -160.3125, -158.90625}},
{"2pup20sqj", []float64{-0.128059387207, -0.128016471863, -174.368948936, -174.368906021}},
{"07", []float64{-73.125, -67.5, -168.75, -157.5}},
{"jj7nh21g4zk", []float64{-59.4135086238, -59.4135072827, 49.408044219, 49.4080455601}},
{"19up4rw9", []float64{-78.8844108582, -78.8842391968, -106.767196655, -106.766853333}},
{"2j", []float64{-16.875, -11.25, -180.0, -168.75}},
{"14uexty", []float64{-73.8844299316, -73.8830566406, -128.33404541, -128.332672119}},
{"t3wtb", []float64{9.4482421875, 9.4921875, 65.390625, 65.4345703125}},
{"wv0z", []float64{29.35546875, 29.53125, 124.8046875, 125.15625}},
{"jj6gcte19u", []float64{-59.7790789604, -59.779073596, 48.9373004436, 48.9373111725}},
{"xz0wc0te1bbe", []float64{40.5647895299, 40.5647896975, 169.504699185, 169.504699521}},
{"d", []float64{0.0, 45.0, -90.0, -45.0}},
{"zyejp1um86c", []float64{82.4519781768, 82.4519795179, 173.282215744, 173.282217085}},
{"ft915xruxj8n", []float64{76.1539096758, 76.1539098434, -65.9289979935, -65.9289976582}},
{"vchvx0yp5c", []float64{51.5971237421, 51.5971291065, 85.7457053661, 85.745716095}},
{"x5x", []float64{19.6875, 21.09375, 144.84375, 146.25}},
{"0ykju58", []float64{-53.8137817383, -53.8124084473, -140.44921875, -140.447845459}},
{"d2yk35fd", []float64{4.98676300049, 4.98693466187, -69.91355896, -69.9132156372}},
{"6ymr7k8", []float64{-8.54461669922, -8.5432434082, -48.7243652344, -48.7229919434}},
{"pjsxb9f", []float64{-57.6905822754, -57.6892089844, 141.352844238, 141.354217529}},
{"trydkh27gn2t", []float64{44.0132818557, 44.0132820234, 65.5668789893, 65.5668793246}},
{"k72c0uqqw2", []float64{-26.5185070038, -26.5185016394, 12.3464977741, 12.346508503}},
{"s8trjfz", []float64{4.05807495117, 4.05944824219, 30.145111084, 30.146484375}},
{"57ffkwfnrh1", []float64{-68.4725689888, -68.4725676477, -29.6820102632, -29.6820089221}},
{"x", []float64{0.0, 45.0, 135.0, 180.0}},
{"u2k", []float64{46.40625, 47.8125, 16.875, 18.28125}},
{"nndqt", []float64{-52.294921875, -52.2509765625, 93.3837890625, 93.427734375}},
{"w7", []float64{16.875, 22.5, 101.25, 112.5}},
{"r6c7x00p6", []float64{-28.91477108, -28.9147281647, 148.315515518, 148.315558434}},
{"mdrgz", []float64{-31.6845703125, -31.640625, 78.7060546875, 78.75}},
{"f1dzhud0j", []float64{54.6926879883, 54.6927309036, -85.9211111069, -85.9210681915}},
{"yqh", []float64{78.75, 80.15625, 106.875, 108.28125}},
{"9jp43kj", []float64{28.5424804688, 28.5438537598, -125.094451904, -125.093078613}},
{"14pb4v5q", []float64{-78.7215042114, -78.72133255, -123.976249695, -123.975906372}},
{"bjzkjzr9hsvc", []float64{78.0868977495, 78.0868979171, -169.54150144, -169.541501105}},
{"svjbhs9e9g8", []float64{28.1503388286, 28.1503401697, 42.0358264446, 42.0358277857}},
{"guuxc8c5v", []float64{73.0858182907, 73.0858612061, -4.85436916351, -4.85432624817}},
{"utu2603h0ru5", []float64{77.3897973262, 77.3897974938, 28.5658425093, 28.5658428445}},
{"bq", []float64{78.75, 84.375, -168.75, -157.5}},
{"kk", []float64{-22.5, -16.875, 11.25, 22.5}},
{"6vxq65mhx", []float64{-12.9452419281, -12.9451990128, -45.9596300125, -45.9595870972}},
{"f4sb", []float64{59.0625, 59.23828125, -83.3203125, -82.96875}},
{"y5p4", []float64{62.2265625, 62.40234375, 99.84375, 100.1953125}},
{"bs6cju", []float64{69.1040039062, 69.1094970703, -153.380126953, -153.369140625}},
{"5j", []float64{-61.875, -56.25, -45.0, -33.75}},
{"4e8z0qpsppx", []float64{-69.048345387, -69.0483440459, -66.4237166941, -66.423715353}},
{"nbyg", []float64{-85.25390625, -85.078125, 133.2421875, 133.59375}},
{"8jn2dnxvbd", []float64{28.2495939732, 28.2495993376, -171.112382412, -171.112371683}},
{"0h6ej9g", []float64{-65.5567932129, -65.5554199219, -176.238555908, -176.237182617}},
{"9j18njf9mc", []float64{28.1568056345, 28.1568109989, -132.623273134, -132.623262405}},
{"pf93jtqd2xm9", []float64{-75.7324543409, -75.7324541733, 170.758466944, 170.758467279}},
{"hc2", []float64{-82.96875, -81.5625, 33.75, 35.15625}},
{"g", []float64{45.0, 90.0, -45.0, 0.0}},
{"cewhub", []float64{65.5224609375, 65.5279541016, -103.853759766, -103.842773438}},
{"2vcrfbgsv2sx", []float64{-11.2890061922, -11.2890060246, -144.366300032, -144.366299696}},
{"mxdsnv", []float64{-2.08190917969, -2.07641601562, 71.3122558594, 71.3232421875}},
{"03", []float64{-84.375, -78.75, -168.75, -157.5}},
{"u73kybuxbq", []float64{64.1216933727, 64.1216987371, 13.3106338978, 13.3106446266}},
{"uz2", []float64{85.78125, 87.1875, 33.75, 35.15625}},
{"3w", []float64{-11.25, -5.625, -112.5, -101.25}},
{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
{"q8kttcg9t", []float64{-42.6170825958, -42.6170396805, 119.085831642, 119.085874557}},
{"j8w8vhmh9d", []float64{-87.0315349102, -87.0315295458, 76.8672823906, 76.8672931194}},
{"qxn54fn91g", []float64{-5.08648216724, -5.08647680283, 121.067351103, 121.067361832}},
{"9", []float64{0.0, 45.0, -135.0, -90.0}},
{"3h2p4vmu", []float64{-19.8337554932, -19.8335838318, -134.871253967, -134.870910645}},
{"j4re6xcw", []float64{-76.7288589478, -76.7286872864, 55.6587982178, 55.6591415405}},
{"ugkmrc1vj", []float64{64.2104530334, 64.2104959488, 40.0697565079, 40.0697994232}},
{"n1mj98dq", []float64{-81.9981765747, -81.9980049133, 97.1002578735, 97.1006011963}},
{"r6c", []float64{-29.53125, -28.125, 147.65625, 149.0625}},
{"9uksmr", []float64{24.6917724609, 24.697265625, -94.6911621094, -94.6801757812}},
{"dmve", []float64{32.87109375, 33.046875, -71.015625, -70.6640625}},
{"jgrdkvuq", []float64{-71.2906265259, -71.2904548645, 89.5114517212, 89.5117950439}},
{"94g", []float64{15.46875, 16.875, -130.78125, -129.375}},
{"2vj4gt", []float64{-16.3641357422, -16.3586425781, -139.064941406, -139.053955078}},
{"q39q2pm5kxt", []float64{-35.4234436154, -35.4234422743, 103.01487878, 103.014880121}},
{"qcuy493hpwdf", []float64{-34.0939741954, -34.0939740278, 130.541249625, 130.541249961}},
{"nhfpjqpyqnv3", []float64{-62.0167130046, -62.0167128369, 93.0541204289, 93.0541207641}},
{"838kk", []float64{9.1845703125, 9.228515625, -168.22265625, -168.178710938}},
{"zx2fdg", []float64{86.2371826172, 86.2426757812, 158.675537109, 158.686523438}},
{"j7ktd1g4", []float64{-70.7419967651, -70.7418251038, 62.670135498, 62.6704788208}},
{"yzp", []float64{84.375, 85.78125, 133.59375, 135.0}},
{"76kf7tcsnb94", []float64{-31.9159668311, -31.9159666635, -26.91415295, -26.9141526148}},
{"9jb", []float64{32.34375, 33.75, -135.0, -133.59375}},
{"6w", []float64{-11.25, -5.625, -67.5, -56.25}},
{"f2zs58", []float64{49.921875, 49.9273681641, -68.0493164062, -68.0383300781}},
{"f0", []float64{45.0, 50.625, -90.0, -78.75}},
{"mnqum74bk", []float64{-9.08015727997, -9.08011436462, 54.7268486023, 54.7268915176}},
{"t6rhggbn5", []float64{13.512840271, 13.5128831863, 66.2586736679, 66.2587165833}},
{"g9q", []float64{52.03125, 53.4375, -14.0625, -12.65625}},
{"7vr", []float64{-15.46875, -14.0625, -1.40625, 0.0}},
{"t6sr47h", []float64{15.3094482422, 15.3108215332, 62.3309326172, 62.3323059082}},
{"076vzrw", []float64{-70.666809082, -70.665435791, -164.555969238, -164.554595947}},
{"s2", []float64{0.0, 5.625, 11.25, 22.5}},
{"gd7350xxrrs", []float64{57.8360626101, 57.8360639513, -17.7872353792, -17.7872340381}},
{"0p8sgbg", []float64{-46.9734191895, -46.9720458984, -179.127960205, -179.126586914}},
{"5zsn39", []float64{-46.7083740234, -46.7028808594, -5.55908203125, -5.54809570312}},
{"80f", []float64{4.21875, 5.625, -177.1875, -175.78125}},
{"cymr4xm05c0", []float64{81.4265495539, 81.426550895, -93.7502968311, -93.75029549}},
{"qmjmz", []float64{-15.8642578125, -15.8203125, 108.940429688, 108.984375}},
{"39c0", []float64{-35.15625, -34.98046875, -111.09375, -110.7421875}},
{"pgxnue0jpfn", []float64{-69.1086280346, -69.1086266935, 178.791844547, 178.791845888}},
{"nytxjp", []float64{-52.1685791016, -52.1630859375, 131.704101562, 131.715087891}},
{"q1mvpgze", []float64{-37.0687294006, -37.0685577393, 98.4368133545, 98.4371566772}},
{"tqjgn4h", []float64{34.2883300781, 34.2897033691, 64.6051025391, 64.6064758301}},
{"tjn18qrtdk", []float64{28.4239697456, 28.4239751101, 53.4588825703, 53.4588932991}},
{"qq3w12r", []float64{-8.78768920898, -8.78631591797, 103.423919678, 103.425292969}},
{"zzdu8g6", []float64{87.9963684082, 87.9977416992, 172.652893066, 172.654266357}},
{"mz72xeccms3t", []float64{-4.11002179608, -4.11002162844, 83.6525436491, 83.6525439844}},
{"s7fnw5nzhbs4", []float64{22.2540122643, 22.2540124319, 14.3356508017, 14.3356511369}},
{"76rtxb6nkdm", []float64{-31.3744948804, -31.3744935393, -22.8596024215, -22.8596010804}},
{"znbejer93dr", []float64{83.5141731799, 83.514174521, 135.955197662, 135.955199003}},
{"smk7u7bz27", []float64{30.212289691, 30.2122950554, 17.4143707752, 17.4143815041}},
{"yvmurs", []float64{75.3002929688, 75.3057861328, 132.165527344, 132.176513672}},
{"tnjn6dgd8", []float64{34.8641681671, 34.8642110825, 52.1459197998, 52.1459627151}},
{"gyee1hnu", []float64{82.1125030518, 82.1126747131, -6.27490997314, -6.27456665039}},
{"gjwh9n02wfmc", []float64{76.7615726776, 76.7615728453, -36.5179139748, -36.5179136395}},
{"n6jh51hrnfws", []float64{-78.0401661247, -78.0401659571, 108.41922082, 108.419221155}},
{"shgszu46pudj", []float64{27.5760518946, 27.5760520622, 5.26587635279, 5.26587668806}},
{"3c", []float64{-39.375, -33.75, -101.25, -90.0}},
{"fy", []float64{78.75, 84.375, -56.25, -45.0}},
{"s75d5tn3m", []float64{17.254242897, 17.2542858124, 16.3344812393, 16.3345241547}},
{"2c1c9kkw8dk5", []float64{-39.0868538059, -39.0868536383, -143.727924228, -143.727923892}},
{"5yugurey8e2", []float64{-51.3297383487, -51.3297370076, -4.37837362289, -4.37837228179}},
{"rd", []float64{-33.75, -28.125, 157.5, 168.75}},
{"um", []float64{73.125, 78.75, 11.25, 22.5}},
{"bkgc", []float64{71.89453125, 72.0703125, -163.4765625, -163.125}},
{"8cfxnrphhu0", []float64{11.1133790016, 11.1133803427, -142.449899912, -142.449898571}},
{"tdm", []float64{12.65625, 14.0625, 74.53125, 75.9375}},
{"y9usehucn02q", []float64{55.6610321626, 55.6610323302, 118.966741897, 118.966742232}},
{"vk7kbfk0vf33", []float64{69.7537115403, 69.7537117079, 60.859013088, 60.8590134233}},
{"ewyx82", []float64{39.287109375, 39.2926025391, -13.3483886719, -13.3374023438}},
{"ev8c3", []float64{31.1572265625, 31.201171875, -10.1513671875, -10.107421875}},
{"37p4fhuc", []float64{-27.6153373718, -27.6151657104, -113.811836243, -113.81149292}},
{"0yvh2p9wbu", []float64{-51.2418007851, -51.2417954206, -139.216657877, -139.216647148}},
{"81k", []float64{7.03125, 8.4375, -174.375, -172.96875}},
{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
{"5c67rs", []float64{-82.3754882812, -82.3699951172, -7.75634765625, -7.74536132812}},
{"udjvtg", []float64{57.2332763672, 57.2387695312, 30.8386230469, 30.849609375}},
{"8b56vfqrppu", []float64{0.497001260519, 0.497002601624, -141.418113112, -141.418111771}},
{"xv50", []float64{28.125, 28.30078125, 172.96875, 173.3203125}},
{"7ep", []float64{-28.125, -26.71875, -12.65625, -11.25}},
{"bxzp4746", []float64{89.8410415649, 89.8412132263, -147.554283142, -147.553939819}},
{"5d54r", []float64{-78.3544921875, -78.310546875, -17.9736328125, -17.9296875}},
{"hhknsytff", []float64{-64.9149942398, -64.9149513245, 5.8417224884, 5.84176540375}},
{"gvjjq75", []float64{74.0643310547, 74.0657043457, -3.93997192383, -3.93859863281}},
{"6ryrt5", []float64{-0.0714111328125, -0.06591796875, -69.7412109375, -69.7302246094}},
{"tykj1vq1gj", []float64{36.0643225908, 36.0643279552, 84.460272789, 84.4602835178}},
{"20tdw84b1w", []float64{-41.7480146885, -41.7480093241, -171.976139545, -171.976128817}},
{"r", []float64{-45.0, 0.0, 135.0, 180.0}},
{"sbwdq1c4355", []float64{3.21802318096, 3.21802452207, 43.1557171047, 43.1557184458}},
{"rwhkzg", []float64{-10.3985595703, -10.3930664062, 163.817138672, 163.828125}},
{"wrzphwjw57", []float64{44.8582237959, 44.8582291603, 111.299196482, 111.299207211}},
{"674", []float64{-28.125, -26.71875, -75.9375, -74.53125}},
{"z8kb", []float64{46.40625, 46.58203125, 164.1796875, 164.53125}},
{"pmudq9vs33", []float64{-57.2503942251, -57.2503888607, 152.871376276, 152.871387005}},
{"j1br4n9", []float64{-78.8900756836, -78.8887023926, 45.440826416, 45.442199707}},
{"ccc", []float64{54.84375, 56.25, -99.84375, -98.4375}},
{"src", []float64{43.59375, 45.0, 12.65625, 14.0625}},
{"cc51keq", []float64{50.8625793457, 50.8639526367, -96.8252563477, -96.8238830566}},
{"pr", []float64{-50.625, -45.0, 146.25, 157.5}},
{"tvd9", []float64{31.11328125, 31.2890625, 82.265625, 82.6171875}},
{"489bdms44x", []float64{-87.069016099, -87.0690107346, -64.9345850945, -64.9345743656}},
{"cmn23svsyv", []float64{73.1958800554, 73.1958854198, -114.887176752, -114.887166023}},
{"dm9vug1194", []float64{31.9649899006, 31.964995265, -76.0789060593, -76.0788953304}},
{"45f7", []float64{-68.37890625, -68.203125, -86.8359375, -86.484375}},
{"mxuxkdtgy50", []float64{-0.117443203926, -0.117441862822, 74.0340328217, 74.0340341628}},
{"tdwd7", []float64{14.4580078125, 14.501953125, 76.7724609375, 76.81640625}},
{"s", []float64{0.0, 45.0, 0.0, 45.0}},
{"jss4y", []float64{-64.2041015625, -64.16015625, 73.388671875, 73.4326171875}},
{"tmcs", []float64{33.046875, 33.22265625, 58.359375, 58.7109375}},
{"x", []float64{0.0, 45.0, 135.0, 180.0}},
{"gc61ncnxuec", []float64{52.2138749063, 52.2138762474, -8.13174828887, -8.13174694777}},
{"jwpc", []float64{-56.07421875, -55.8984375, 78.3984375, 78.75}},
{"yq5gn", []float64{79.27734375, 79.3212890625, 106.787109375, 106.831054688}},
{"uhtmex", []float64{71.3177490234, 71.3232421875, 7.53662109375, 7.54760742188}},
{"n0hbvyc", []float64{-89.8310852051, -89.8297119141, 96.9337463379, 96.9351196289}},
{"kp8q0gk0h", []float64{-1.7399597168, -1.73991680145, 0.390186309814, 0.390229225159}},
{"yjzduj46p", []float64{77.8549575806, 77.8550004959, 100.726046562, 100.726089478}},
{"8hhkjyy4v5s", []float64{23.2406947017, 23.2406960428, -173.762292266, -173.762290925}},
{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
{"wwu63", []float64{38.3642578125, 38.408203125, 118.520507812, 118.564453125}},
{"z9rnym", []float64{53.2452392578, 53.2507324219, 167.618408203, 167.629394531}},
{"78fnfc", []float64{-39.5892333984, -39.5837402344, -19.5666503906, -19.5556640625}},
{"8dc1mqyer", []float64{15.7261133194, 15.7261562347, -155.85381031, -155.853767395}},
{"b5", []float64{61.875, 67.5, -180.0, -168.75}},
{"q3zq3gz6w7", []float64{-34.0365725756, -34.0365672112, 111.532441378, 111.532452106}},
{"xx6", []float64{40.78125, 42.1875, 160.3125, 161.71875}},
{"r3", []float64{-39.375, -33.75, 146.25, 157.5}},
{"dytz0swq74e", []float64{37.8187742829, 37.818775624, -48.1333740056, -48.1333726645}},
{"gpwu", []float64{87.890625, 88.06640625, -35.5078125, -35.15625}},
{"9ywdf6cr2w7", []float64{37.0622827113, 37.0622840524, -92.0087559521, -92.008754611}},
{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
{"ues", []float64{64.6875, 66.09375, 28.125, 29.53125}},
{"qggvs5q7pr", []float64{-22.9210478067, -22.9210424423, 129.208112955, 129.208123684}},
{"fy9d8y2mv2", []float64{82.0372724533, 82.0372778177, -54.1070973873, -54.1070866585}},
{"bxx92bb60s", []float64{87.411711216, 87.4117165804, -146.919801235, -146.919790506}},
{"uugkmp4jkm0", []float64{72.5052005053, 72.5052018464, 38.5429680347, 38.5429693758}},
{"7mmd13sd30", []float64{-15.1085615158, -15.1085561514, -25.9544706345, -25.9544599056}},
{"5nn2", []float64{-56.25, -56.07421875, -36.2109375, -35.859375}},
{"jf9sz2r", []float64{-75.1011657715, -75.0997924805, 81.1875915527, 81.1889648438}},
{"3r", []float64{-5.625, 0.0, -123.75, -112.5}},
{"yw", []float64{78.75, 84.375, 112.5, 123.75}},
{"yt31y5hwc3c5", []float64{74.8565152846, 74.8565154523, 114.17615667, 114.176157005}},
{"7vgv9xhmn", []float64{-11.6501426697, -11.6500997543, -5.90455055237, -5.90450763702}},
{"f10tgm4q6", []float64{51.6642808914, 51.6643238068, -89.1508769989, -89.1508340836}},
{"hepj84tfcj", []float64{-72.143971324, -72.1439659595, 32.3516893387, 32.3517000675}},
{"zg", []float64{61.875, 67.5, 168.75, 180.0}},
{"by12", []float64{78.75, 78.92578125, -144.4921875, -144.140625}},
{"51", []float64{-84.375, -78.75, -45.0, -33.75}},
{"w44s78ur", []float64{12.0023918152, 12.0025634766, 93.6752700806, 93.6756134033}},
{"2tcr0", []float64{-11.42578125, -11.3818359375, -155.7421875, -155.698242188}},
{"p0n", []float64{-90.0, -88.59375, 143.4375, 144.84375}},
{"u1", []float64{50.625, 56.25, 0.0, 11.25}},
{"nygu1mshqxku", []float64{-51.2971434742, -51.2971433066, 129.084147625, 129.08414796}},
{"3khrs6gty5", []float64{-21.1655312777, -21.1655259132, -117.581605911, -117.581595182}},
{"4n", []float64{-56.25, -50.625, -90.0, -78.75}},
{"tj8pjxb5", []float64{32.2110557556, 32.211227417, 45.2416992188, 45.2420425415}},
{"7nhpg3stdjgu", []float64{-9.87847991288, -9.87847974524, -39.225907065, -39.2259067297}},
{"rhtcg24t2s50", []float64{-19.3789601326, -19.378959965, 143.232218474, 143.232218809}},
{"5vx7c75x", []float64{-58.3856391907, -58.3854675293, -0.99494934082, -0.994606018066}},
{"cs4kgc65z", []float64{68.3424711227, 68.3425140381, -109.168095589, -109.168052673}},
{"k3sn2mb", []float64{-35.4322814941, -35.4309082031, 16.8859863281, 16.8873596191}},
{"ud7s8v", []float64{58.4747314453, 58.4802246094, 27.4548339844, 27.4658203125}},
{"8qqg2ue1mr6b", []float64{35.7525117695, 35.7525119372, -159.220504649, -159.220504314}},
{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
{"kwtg55d3me", []float64{-7.89069950581, -7.89069414139, 30.7210993767, 30.7211101055}},
{"mqsedtkq3", []float64{-7.79235363007, -7.79231071472, 62.6938676834, 62.6939105988}},
{"94j6tg", []float64{11.7059326172, 11.7114257812, -127.364501953, -127.353515625}},
{"wd45s", []float64{11.865234375, 11.9091796875, 115.48828125, 115.532226562}},
{"fwgxjq0n2", []float64{84.233250618, 84.2332935333, -62.3474121094, -62.347369194}},
{"1k8fh1", []float64{-64.3304443359, -64.3249511719, -122.51953125, -122.508544922}},
{"h", []float64{-90.0, -45.0, 0.0, 45.0}},
{"f", []float64{45.0, 90.0, -90.0, -45.0}},
{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
{"qh", []float64{-22.5, -16.875, 90.0, 101.25}},
{"24nszw5", []float64{-32.8820800781, -32.8807067871, -170.525665283, -170.524291992}},
{"p8rfy", []float64{-88.1103515625, -88.06640625, 168.662109375, 168.706054688}},
{"st23m", []float64{29.7509765625, 29.794921875, 23.0712890625, 23.115234375}},
{"zgg3", []float64{66.26953125, 66.4453125, 173.3203125, 173.671875}},
{"zgsgk7bcz8k", []float64{65.2796901762, 65.2796915174, 175.617812276, 175.617813617}},
{"js0", []float64{-67.5, -66.09375, 67.5, 68.90625}},
{"9zs3bh", []float64{42.5170898438, 42.5225830078, -95.2734375, -95.2624511719}},
{"k8qd0d6mqfz", []float64{-43.2289119065, -43.2289105654, 31.6659866273, 31.6659879684}},
{"xrj", []float64{39.375, 40.78125, 153.28125, 154.6875}},
{"0mbxv2r4", []float64{-56.2922286987, -56.2920570374, -167.806549072, -167.80620575}},
{"bdf8wz8g1", []float64{60.5983543396, 60.5983972549, -153.686671257, -153.686628342}},
{"emgeh3mkyu", []float64{32.8787970543, 32.8788024187, -28.6338579655, -28.6338472366}},
{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
{"156p3nxfwq", []float64{-70.4081690311, -70.4081636667, -132.132643461, -132.132632732}},
{"stduvpcnp4", []float64{31.8160736561, 31.8160790205, 26.5885877609, 26.5885984898}},
{"shk8evrmmbgr", []float64{24.0238861553, 24.023886323, 6.50312740356, 6.50312773883}},
{"7ynw", []float64{-10.1953125, -10.01953125, -2.109375, -1.7578125}},
{"r4zgvkp", []float64{-28.8500976562, -28.8487243652, 146.138763428, 146.140136719}},
{"5b9p", []float64{-85.95703125, -85.78125, -9.84375, -9.4921875}},
{"gsp", []float64{67.5, 68.90625, -12.65625, -11.25}},
{"ek4mmvr", []float64{23.4516906738, 23.4530639648, -30.323638916, -30.322265625}},
{"hd2nu", []float64{-76.1572265625, -76.11328125, 22.67578125, 22.7197265625}},
{"xzp0t", []float64{39.462890625, 39.5068359375, 178.813476562, 178.857421875}},
{"fycjx9", []float64{83.9410400391, 83.9465332031, -54.5141601562, -54.5031738281}},
{"x4ygy62x5", []float64{16.1414909363, 16.1415338516, 144.767661095, 144.76770401}},
{"37", []float64{-28.125, -22.5, -123.75, -112.5}},
{"m40", []float64{-33.75, -32.34375, 45.0, 46.40625}},
{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
{"b5", []float64{61.875, 67.5, -180.0, -168.75}},
{"zwd", []float64{81.5625, 82.96875, 160.3125, 161.71875}},
{"qgnb2g", []float64{-28.0645751953, -28.0590820312, 133.275146484, 133.286132812}},
{"7w", []float64{-11.25, -5.625, -22.5, -11.25}},
{"v6x", []float64{59.0625, 60.46875, 66.09375, 67.5}},
{"018v8j6y", []float64{-80.5658340454, -80.565662384, -178.94153595, -178.941192627}},
{"mm4p02h18xc", []float64{-15.6442321837, -15.6442308426, 59.079002291, 59.0790036321}},
{"6cty1v7", []float64{-35.4789733887, -35.4776000977, -48.0830383301, -48.0816650391}},
{"g6rzs", []float64{58.974609375, 59.0185546875, -22.67578125, -22.6318359375}},
{"58qb", []float64{-88.59375, -88.41796875, -13.0078125, -12.65625}},
{"n8v", []float64{-85.78125, -84.375, 119.53125, 120.9375}},
{"h1nj4b1uv", []float64{-83.4952783585, -83.4952354431, 8.56096744537, 8.56101036072}},
{"qt4ukphd", []float64{-16.0891342163, -16.0889625549, 116.54914856, 116.549491882}},
{"n2", []float64{-90.0, -84.375, 101.25, 112.5}},
{"50sux01hejpj", []float64{-86.3956842385, -86.3956840709, -38.0111838877, -38.0111835524}},
{"4exy", []float64{-69.2578125, -69.08203125, -56.6015625, -56.25}},
{"x9t8r", []float64{8.4814453125, 8.525390625, 165.541992188, 165.5859375}},
{"785m2wrxgw2", []float64{-44.0414522588, -44.0414509177, -17.8972649574, -17.8972636163}},
{"0exegdddqf", []float64{-69.6391904354, -69.639185071, -146.7955935, -146.795582771}},
{"ynrw", []float64{81.2109375, 81.38671875, 100.546875, 100.8984375}},
{"uqdmuzrz6", []float64{82.6143121719, 82.6143550873, 14.6335315704, 14.6335744858}},
{"gchp3yu15c", []float64{51.9366699457, 51.9366753101, -5.54244160652, -5.54243087769}},
{"415qzwpj2r", []float64{-83.154578805, -83.1545734406, -85.0904738903, -85.0904631615}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"vzq07z4", []float64{85.8636474609, 85.865020752, 87.3550415039, 87.3564147949}},
{"w43s", []float64{13.359375, 13.53515625, 92.109375, 92.4609375}},
{"b7td", []float64{65.0390625, 65.21484375, -161.015625, -160.6640625}},
{"2fpe", []float64{-33.22265625, -33.046875, -135.703125, -135.3515625}},
{"nyt", []float64{-53.4375, -52.03125, 130.78125, 132.1875}},
{"g", []float64{45.0, 90.0, -45.0, 0.0}},
{"s", []float64{0.0, 45.0, 0.0, 45.0}},
{"dkzk", []float64{27.421875, 27.59765625, -68.5546875, -68.203125}},
{"r3925m207w", []float64{-36.5335857868, -36.5335804224, 148.150784969, 148.150795698}},
{"0zy5gndwbzt3", []float64{-45.710165631, -45.7101654634, -137.677191608, -137.677191272}},
{"46ex1dmu9", []float64{-74.6938991547, -74.6938562393, -73.7542676926, -73.7542247772}},
{"jf74h9rrvnz8", []float64{-76.9839544594, -76.9839542918, 83.1766849011, 83.1766852364}},
{"6vj1jtgv", []float64{-16.6667747498, -16.6666030884, -48.9719009399, -48.9715576172}},
{"ntsx", []float64{-57.83203125, -57.65625, 118.828125, 119.1796875}},
{"ehr3ejb", []float64{24.2015075684, 24.2028808594, -34.6728515625, -34.6714782715}},
{"p7", []float64{-73.125, -67.5, 146.25, 157.5}},
{"re7", []float64{-26.71875, -25.3125, 161.71875, 163.125}},
{"66x6j7sc0t", []float64{-30.5665129423, -30.5665075779, -68.3174300194, -68.3174192905}},
{"mywcjb2q", []float64{-8.25931549072, -8.25914382935, 88.4952163696, 88.4955596924}},
{"f88jrw", []float64{48.7683105469, 48.7738037109, -67.1704101562, -67.1594238281}},
{"bjty3ef9n3y6", []float64{77.0569135621, 77.0569137298, -171.844434701, -171.844434366}},
{"jhz66rh4fj7s", []float64{-62.8467891365, -62.8467889689, 55.2997731417, 55.299773477}},
{"u16r3nm", []float64{53.3399963379, 53.3413696289, 3.21487426758, 3.21624755859}},
{"b", []float64{45.0, 90.0, -180.0, -135.0}},
{"q5rwnj4", []float64{-25.6365966797, -25.6352233887, 100.813293457, 100.814666748}},
{"dsqd4vc85c", []float64{24.2894035578, 24.2894089222, -58.2363045216, -58.2362937927}},
{"bzpu7z2qxf9", []float64{85.1630249619, 85.1630263031, -135.18609032, -135.186088979}},
{"e805cvqt", []float64{0.688877105713, 0.68904876709, -22.4141693115, -22.4138259888}},
{"y9su01vg", []float64{54.1507530212, 54.1509246826, 119.187583923, 119.187927246}},
{"41", []float64{-84.375, -78.75, -90.0, -78.75}},
{"vu3f", []float64{69.2578125, 69.43359375, 81.2109375, 81.5625}},
{"86", []float64{11.25, 16.875, -168.75, -157.5}},
{"wpuksnn65", []float64{44.4180679321, 44.4181108475, 96.1610555649, 96.1610984802}},
{"w", []float64{0.0, 45.0, 90.0, 135.0}},
{"nrqk483fq0kp", []float64{-48.5138629563, -48.5138627887, 110.151591897, 110.151592232}},
{"cg0mc", []float64{62.8857421875, 62.9296875, -100.854492188, -100.810546875}},
{"myt75g", []float64{-7.89367675781, -7.88818359375, 86.2976074219, 86.30859375}},
{"ugxe27b", []float64{65.2793884277, 65.2807617188, 44.3078613281, 44.3092346191}},
{"wd845dtbhs8", []float64{14.42781955, 14.4278208911, 112.661898136, 112.661899477}},
{"c8ef9h6mr", []float64{48.2762002945, 48.2762432098, -107.179226875, -107.17918396}},
{"bx", []float64{84.375, 90.0, -157.5, -146.25}},
{"qduv3", []float64{-28.6083984375, -28.564453125, 119.223632812, 119.267578125}},
{"j86depxm", []float64{-88.1122398376, -88.1120681763, 71.1574172974, 71.1577606201}},
{"4semjs5hr9p", []float64{-63.7858861685, -63.7858848274, -62.6835371554, -62.6835358143}},
{"ee", []float64{16.875, 22.5, -22.5, -11.25}},
{"jxv25m96n", []float64{-46.3756942749, -46.3756513596, 75.0276088715, 75.0276517868}},
{"vx7gj0006xwe", []float64{86.3086774014, 86.308677569, 72.993280068, 72.9932804033}},
{"6c", []float64{-39.375, -33.75, -56.25, -45.0}},
{"ukstfgt78f", []float64{71.3430798054, 71.3430851698, 17.7062165737, 17.7062273026}},
{"g1h0ye", []float64{50.7733154297, 50.7788085938, -39.0893554688, -39.0783691406}},
{"5s3j5er", []float64{-65.1969909668, -65.1956176758, -20.9303283691, -20.9289550781}},
{"6yrnnwq1", []float64{-8.75455856323, -8.75438690186, -46.1123657227, -46.1120223999}},
{"20fjy", []float64{-39.7705078125, -39.7265625, -176.923828125, -176.879882812}},
{"1tt417q", []float64{-58.6930847168, -58.6917114258, -105.405578613, -105.404205322}},
{"hh68", []float64{-66.09375, -65.91796875, 3.515625, 3.8671875}},
{"85s823r7j", []float64{19.7388267517, 19.7388696671, -173.650717735, -173.65067482}},
{"3u6n", []float64{-20.0390625, -19.86328125, -98.4375, -98.0859375}},
{"7w5y8", []float64{-10.107421875, -10.0634765625, -17.2265625, -17.1826171875}},
{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
{"zk8j", []float64{71.19140625, 71.3671875, 146.25, 146.6015625}},
{"mgm1evw0", []float64{-26.4248657227, -26.4246940613, 85.954284668, 85.9546279907}},
{"m7nzv", []float64{-26.7626953125, -26.71875, 65.9619140625, 66.005859375}},
{"ev89re48", []float64{31.1737060547, 31.1738777161, -10.2138519287, -10.213508606}},
{"dnc7mrxvb3", []float64{38.5822302103, 38.5822355747, -88.0008208752, -88.0008101463}},
{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
{"wbksmz281", []float64{2.19314575195, 2.1931886673, 130.331540108, 130.331583023}},
{"0x3zqt", []float64{-47.9168701172, -47.9113769531, -154.753417969, -154.742431641}},
{"h97q", []float64{-81.9140625, -81.73828125, 27.0703125, 27.421875}},
{"ypjt38wtc", []float64{85.3015851974, 85.3016281128, 97.8092622757, 97.809305191}},
{"d3ey054b7p", []float64{9.50874745846, 9.50875282288, -73.4726572037, -73.4726464748}},
{"zpbkps3qd9r", []float64{89.3213434517, 89.3213447928, 135.682985634, 135.682986975}},
{"sqxhhhs68mxy", []float64{37.2908039019, 37.2908040695, 21.2753888592, 21.2753891945}},
{"293cyj4g6q", []float64{-37.6330769062, -37.6330715418, -154.771517515, -154.771506786}},
{"w3", []float64{5.625, 11.25, 101.25, 112.5}},
{"r", []float64{-45.0, 0.0, 135.0, 180.0}},
{"q69s", []float64{-30.234375, -30.05859375, 103.359375, 103.7109375}},
{"fy1qerq2ven", []float64{79.9325484037, 79.9325497448, -54.3405380845, -54.3405367434}},
{"62", []float64{-45.0, -39.375, -78.75, -67.5}},
{"yke1jyek0fg", []float64{70.5246882141, 70.5246895552, 105.725934952, 105.725936294}},
{"2rcbq1z5c", []float64{-1.35204792023, -1.35200500488, -166.015734673, -166.015691757}},
{"gue", []float64{70.3125, 71.71875, -7.03125, -5.625}},
{"t8kqv", []float64{2.5927734375, 2.63671875, 73.6962890625, 73.740234375}},
{"bgtecc1b", []float64{65.3521728516, 65.3523445129, -138.436317444, -138.435974121}},
{"8s550", []float64{23.02734375, 23.0712890625, -153.28125, -153.237304688}},
{"j655kpm1w0b", []float64{-78.1386239827, -78.1386226416, 60.6516551971, 60.6516565382}},
{"980h", []float64{0.703125, 0.87890625, -112.5, -112.1484375}},
{"ssywj", []float64{27.7734375, 27.8173828125, 31.8603515625, 31.904296875}},
{"hrvu", []float64{-45.703125, -45.52734375, 19.3359375, 19.6875}},
{"3ftuv", []float64{-30.1025390625, -30.05859375, -92.9443359375, -92.900390625}},
{"zcphg93jux", []float64{51.4678519964, 51.4678573608, 178.749125004, 178.749135733}},
{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
{"vjxk", []float64{76.640625, 76.81640625, 55.1953125, 55.546875}},
{"t6cgynpnh4", []float64{16.161929369, 16.1619347334, 58.9843940735, 58.9844048023}},
{"jspc", []float64{-67.32421875, -67.1484375, 78.3984375, 78.75}},
{"6w7k2v", []float64{-9.06921386719, -9.06372070312, -62.8967285156, -62.8857421875}},
{"n6z4", []float64{-74.1796875, -74.00390625, 111.09375, 111.4453125}},
{"y507hx0", []float64{62.4407958984, 62.4421691895, 90.5493164062, 90.5506896973}},
{"z17p35rnsc6v", []float64{53.3246401884, 53.324640356, 139.272515886, 139.272516221}},
{"p8uj5760", []float64{-84.8844909668, -84.8843193054, 163.270568848, 163.27091217}},
{"8pcszdrxwp", []float64{44.4423955679, 44.4424009323, -177.550477982, -177.550467253}},
{"mckjc7q4r", []float64{-36.9397687912, -36.9397258759, 84.4384717941, 84.4385147095}},
{"p6sngm70", []float64{-74.7221374512, -74.7219657898, 152.021942139, 152.022285461}},
{"59gcptbk", []float64{-79.9481964111, -79.9480247498, -16.8966293335, -16.8962860107}},
{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
{"fmnv3ug5m", []float64{74.0745019913, 74.0745449066, -69.1765737534, -69.176530838}},
{"qc8kkbc0xev", []float64{-35.8112038672, -35.8112025261, 124.312004596, 124.312005937}},
{"b4", []float64{56.25, 61.875, -180.0, -168.75}},
{"vkxeh", []float64{70.83984375, 70.8837890625, 66.97265625, 67.0166015625}},
{"399qcn2zv", []float64{-35.3403139114, -35.3402709961, -110.696997643, -110.696954727}},
{"ybz3", []float64{49.39453125, 49.5703125, 133.9453125, 134.296875}},
{"d5e25", []float64{19.6875, 19.7314453125, -85.2978515625, -85.25390625}},
{"5n", []float64{-56.25, -50.625, -45.0, -33.75}},
{"wdw", []float64{14.0625, 15.46875, 120.9375, 122.34375}},
{"29q7", []float64{-37.44140625, -37.265625, -148.7109375, -148.359375}},
{"tqe1y4trw", []float64{36.885137558, 36.8851804733, 60.7398891449, 60.7399320602}},
{"zfwy172d", []float64{60.135383606, 60.1355552673, 178.297805786, 178.298149109}},
{"tfd57f", []float64{14.6447753906, 14.6502685547, 81.7272949219, 81.73828125}},
{"27f6s6h", []float64{-23.4558105469, -23.4544372559, -165.393676758, -165.392303467}},
{"zk2q2ph7db", []float64{70.0439357758, 70.0439411402, 146.607517004, 146.607527733}},
{"ptp", []float64{-61.875, -60.46875, 167.34375, 168.75}},
{"7pcgp042wz", []float64{-0.878782868385, -0.878777503967, -42.2280657291, -42.2280550003}},
{"9t", []float64{28.125, 33.75, -112.5, -101.25}},
{"d", []float64{0.0, 45.0, -90.0, -45.0}},
{"qd0pwby4y", []float64{-32.4270486832, -32.4270057678, 112.805128098, 112.805171013}},
{"b", []float64{45.0, 90.0, -180.0, -135.0}},
{"yet", []float64{64.6875, 66.09375, 119.53125, 120.9375}},
{"v4pbsh7cp", []float64{56.3614082336, 56.361451149, 56.0796689987, 56.0797119141}},
{"kvr7u7pm7jeu", []float64{-14.7921594232, -14.7921592556, 44.1421702132, 44.1421705484}},
{"687jj", []float64{-42.71484375, -42.6708984375, -63.0615234375, -63.017578125}},
{"4w29", []float64{-54.66796875, -54.4921875, -66.796875, -66.4453125}},
{"6bz45dve", []float64{-40.4140663147, -40.4138946533, -46.2448883057, -46.2445449829}},
{"gfysykk0nw29", []float64{61.32709058, 61.3270907477, -1.82894401252, -1.82894367725}},
{"pdw4scw", []float64{-75.4898071289, -75.4884338379, 166.15447998, 166.155853271}},
{"65f4", []float64{-23.5546875, -23.37890625, -87.1875, -86.8359375}},
{"jc9g6tpd08j", []float64{-80.9634017944, -80.9634004533, 81.3311286271, 81.3311299682}},
{"ckw1tvf18r09", []float64{70.608052779, 70.6080529466, -115.057056472, -115.057056136}},
{"2cbtp", []float64{-34.27734375, -34.2333984375, -145.239257812, -145.1953125}},
{"54myf", []float64{-76.1572265625, -76.11328125, -36.826171875, -36.7822265625}},
{"kw", []float64{-11.25, -5.625, 22.5, 33.75}},
{"mw", []float64{-11.25, -5.625, 67.5, 78.75}},
{"2bjvee8sgek", []float64{-44.0131442249, -44.0131428838, -138.009411693, -138.009410352}},
{"yj6r4eyxuv1", []float64{75.783675313, 75.7836766541, 93.2830573618, 93.2830587029}},
{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
{"pkec4ng", []float64{-64.4746398926, -64.4732666016, 151.615447998, 151.616821289}},
{"uvpynzd0v", []float64{74.2210149765, 74.2210578918, 44.9480295181, 44.9480724335}},
{"grxrc9by8g4", []float64{88.5605496168, 88.5605509579, -23.4877046943, -23.4877033532}},
{"6z0", []float64{-5.625, -4.21875, -56.25, -54.84375}},
{"s", []float64{0.0, 45.0, 0.0, 45.0}},
{"6npe6", []float64{-10.6787109375, -10.634765625, -79.365234375, -79.3212890625}},
{"04wzmgz23", []float64{-74.6424436569, -74.6424007416, -170.245127678, -170.245084763}},
{"dmfzk9", []float64{33.6236572266, 33.6291503906, -74.6850585938, -74.6740722656}},
{"eeu", []float64{21.09375, 22.5, -16.875, -15.46875}},
{"84bd9k3t", []float64{15.9324073792, 15.9325790405, -179.239883423, -179.2395401}},
{"7q6ywwxq2kc", []float64{-8.664367944, -8.6643666029, -29.5871995389, -29.5871981978}},
{"bve6jyuwk", []float64{76.327214241, 76.3272571564, -141.420650482, -141.420607567}},
{"h558", []float64{-73.125, -72.94921875, 4.921875, 5.2734375}},
{"sk4f3d2h1vq", []float64{22.9085822403, 22.9085835814, 15.1831886172, 15.1831899583}},
{"tffnvn", []float64{16.6882324219, 16.6937255859, 81.7822265625, 81.7932128906}},
{"eppvmm886m", []float64{40.3281337023, 40.3281390667, -33.8700664043, -33.8700556755}},
{"d2w", []float64{2.8125, 4.21875, -70.3125, -68.90625}},
{"sd3", []float64{12.65625, 14.0625, 23.90625, 25.3125}},
{"q73kk", []float64{-25.9716796875, -25.927734375, 103.18359375, 103.227539062}},
{"wtz5yx", []float64{33.0413818359, 33.046875, 122.629394531, 122.640380859}},
{"xw8z", []float64{37.79296875, 37.96875, 158.5546875, 158.90625}},
{"dyhuchkt7qr", []float64{34.6092416346, 34.6092429757, -49.5200385153, -49.5200371742}},
{"pyb1es47f", []float64{-51.7449617386, -51.7449188232, 168.906984329, 168.907027245}},
{"v6jcgfj1nus", []float64{56.5687993169, 56.568800658, 64.5078939199, 64.507895261}},
{"xxe", []float64{42.1875, 43.59375, 161.71875, 163.125}},
{"7xdvcext7rq", []float64{-1.78159162402, -1.78159028292, -18.5564473271, -18.556445986}},
{"1f35b7w", []float64{-76.6653442383, -76.6639709473, -99.8245239258, -99.8231506348}},
{"he675b8pjek", []float64{-71.187440604, -71.1874392629, 25.8290988207, 25.8291001618}},
{"pzj3", []float64{-50.44921875, -50.2734375, 176.1328125, 176.484375}},
{"1s1m8j10zwq3", []float64{-66.5055748634, -66.5055746958, -110.740483962, -110.740483627}},
{"sk4xsg4ufxm", []float64{23.8356931508, 23.8356944919, 14.9782557786, 14.9782571197}},
{"r3m2ker83", []float64{-37.906908989, -37.9068660736, 153.840909004, 153.84095192}},
{"p7bj", []float64{-68.02734375, -67.8515625, 146.25, 146.6015625}},
{"4wws597u", []float64{-52.7268218994, -52.726650238, -58.2004165649, -58.2000732422}},
{"u9rk", []float64{52.734375, 52.91015625, 32.6953125, 33.046875}},
{"2mv856bp9uj4", []float64{-12.6398345456, -12.6398343779, -160.872720927, -160.872720592}},
{"57th4543xnbs", []float64{-69.5926011354, -69.5926009677, -26.6274683923, -26.627468057}},
{"8pct", []float64{44.47265625, 44.6484375, -177.890625, -177.5390625}},
{"me0skjqbk8", []float64{-27.3490476608, -27.3490422964, 68.3883690834, 68.3883798122}},
{"30s74", []float64{-41.66015625, -41.6162109375, -128.935546875, -128.891601562}},
{"9r0h9fedn", []float64{40.1800918579, 40.1801347733, -123.668031693, -123.667988777}},
{"9v6gvte7r", []float64{30.2211999893, 30.2212429047, -97.136349678, -97.1363067627}},
{"j5d", []float64{-70.3125, -68.90625, 47.8125, 49.21875}},
{"hf8ge", []float64{-75.322265625, -75.2783203125, 34.9365234375, 34.98046875}},
{"hf2hmz", []float64{-76.5582275391, -76.552734375, 34.0026855469, 34.013671875}},
{"5wc405", []float64{-51.6632080078, -51.6577148438, -21.09375, -21.0827636719}},
{"x2dk49y7p8", []float64{3.52575302124, 3.52575838566, 149.532830715, 149.532841444}},
{"350jjfux7dbk", []float64{-27.2297275811, -27.2297274135, -134.740984105, -134.740983769}},
{"04wd", []float64{-75.5859375, -75.41015625, -170.859375, -170.5078125}},
{"1zxmhstk", []float64{-46.9081878662, -46.9080162048, -90.8497238159, -90.8493804932}},
{"b2sj26ddce3", []float64{48.7495739758, 48.7495753169, -163.11051473, -163.110513389}},
{"vznd8mz363", []float64{84.8462587595, 84.8462641239, 87.9116642475, 87.9116749763}},
{"f", []float64{45.0, 90.0, -90.0, -45.0}},
{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
{"kw", []float64{-11.25, -5.625, 22.5, 33.75}},
{"1s0nq579", []float64{-66.3833427429, -66.3831710815, -112.231521606, -112.231178284}},
{"mzky2scd", []float64{-3.09368133545, -3.09350967407, 85.4537200928, 85.4540634155}},
{"kctpzwx2rxg", []float64{-35.1644052565, -35.1644039154, 41.121122092, 41.1211234331}},
{"19", []float64{-84.375, -78.75, -112.5, -101.25}},
{"pmg4wc8pn", []float64{-57.2073554993, -57.2073125839, 150.765638351, 150.765681267}},
{"sxcn0yd0jck", []float64{44.6841497719, 44.684151113, 23.9422076941, 23.9422090352}},
{"000dsj4g", []float64{-89.5325660706, -89.5323944092, -179.1173172, -179.116973877}},
{"pgv0", []float64{-68.90625, -68.73046875, 175.78125, 176.1328125}},
{"dhw0935d8", []float64{25.4063129425, 25.4063558578, -81.5027618408, -81.5027189255}},
{"4gvz08kbk9", []float64{-67.6743596792, -67.6743543148, -48.1353735924, -48.1353628635}},
{"djz4g1m", []float64{32.8340148926, 32.8353881836, -80.0175476074, -80.0161743164}},
{"x4yhn1h5m1z", []float64{16.1779354513, 16.1779367924, 143.706889004, 143.706890345}},
{"9t", []float64{28.125, 33.75, -112.5, -101.25}},
{"d", []float64{0.0, 45.0, -90.0, -45.0}},
{"4fgytpvp2v", []float64{-73.3448284864, -73.344823122, -50.7499372959, -50.7499265671}},
{"jggxx", []float64{-67.587890625, -67.5439453125, 83.9794921875, 84.0234375}},
{"mye8t1", []float64{-8.34411621094, -8.33862304688, 83.8916015625, 83.9025878906}},
{"bssun7dstj", []float64{71.0356503725, 71.0356557369, -150.542006493, -150.541995764}},
{"ehhv", []float64{23.37890625, 23.5546875, -38.3203125, -37.96875}},
{"484tc", []float64{-88.9892578125, -88.9453125, -63.9404296875, -63.896484375}},
{"d4tjtv", []float64{15.0567626953, 15.0622558594, -82.7160644531, -82.705078125}},
{"2z", []float64{-5.625, 0.0, -146.25, -135.0}},
{"t5ey32", []float64{20.7861328125, 20.7916259766, 50.3283691406, 50.3393554688}},
{"um", []float64{73.125, 78.75, 11.25, 22.5}},
{"pe", []float64{-73.125, -67.5, 157.5, 168.75}},
{"2x05zw6z", []float64{-4.93028640747, -4.93011474609, -157.166633606, -157.166290283}},
{"67", []float64{-28.125, -22.5, -78.75, -67.5}},
{"dxfr3yndexbg", []float64{44.9015942775, 44.9015944451, -64.249955602, -64.2499552667}},
{"y87", []float64{46.40625, 47.8125, 116.71875, 118.125}},
{"2h29w", []float64{-20.830078125, -20.7861328125, -179.033203125, -178.989257812}},
{"cv", []float64{73.125, 78.75, -101.25, -90.0}},
{"jcx2m1mjy9e", []float64{-81.5106931329, -81.5106917918, 89.1721884906, 89.1721898317}},
{"ryj9w", []float64{-10.986328125, -10.9423828125, 176.748046875, 176.791992188}},
{"g5sftxrhf40", []float64{65.1676046848, 65.1676060259, -38.0689144135, -38.0689130723}},
{"nhs95z98z", []float64{-64.4703912735, -64.4703483582, 96.4952802658, 96.4953231812}},
{"s7n00se8u3n", []float64{16.8998533487, 16.8998546898, 19.7144696116, 19.7144709527}},
{"4310r6m", []float64{-84.3186950684, -84.3173217773, -77.0182800293, -77.0169067383}},
{"7kkmp", []float64{-20.21484375, -20.1708984375, -27.4658203125, -27.421875}},
{"3dvnpf13665", []float64{-28.4653508663, -28.4653495252, -105.126356632, -105.12635529}},
{"1cv2hegqd1s7", []float64{-80.1345262863, -80.1345261186, -93.6648788676, -93.6648785323}},
{"uy0yttxwk65", []float64{79.9238741398, 79.9238754809, 35.0568728149, 35.056874156}},
{"166cs2etxffy", []float64{-77.0763716474, -77.0763714798, -119.690902121, -119.690901786}},
{"bm7n7", []float64{75.6298828125, 75.673828125, -164.399414062, -164.35546875}},
{"5r7q", []float64{-48.1640625, -47.98828125, -29.1796875, -28.828125}},
{"ymjqjv6", []float64{74.2085266113, 74.2098999023, 108.888244629, 108.88961792}},
{"jx95kpvmv9", []float64{-47.1976464987, -47.1976411343, 69.0894770622, 69.0894877911}},
{"f1m4m9", []float64{52.4322509766, 52.4377441406, -82.7270507812, -82.7160644531}},
{"0cdr1wfep", []float64{-80.2944374084, -80.2943944931, -143.016285896, -143.016242981}},
{"fe4q2v7", []float64{63.0024719238, 63.0038452148, -64.2988586426, -64.2974853516}},
{"9pxfyb9p", []float64{42.6748466492, 42.6750183105, -123.80355835, -123.803215027}},
{"8w088r", []float64{33.8763427734, 33.8818359375, -156.785888672, -156.774902344}},
{"u569fj", []float64{63.6163330078, 63.6218261719, 3.603515625, 3.61450195312}},
{"smvm0syj0kst", []float64{33.2496320643, 33.2496322319, 18.6630416662, 18.6630420014}},
{"vx", []float64{84.375, 90.0, 67.5, 78.75}},
{"zj", []float64{73.125, 78.75, 135.0, 146.25}},
{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
{"69n85w0", []float64{-39.3420410156, -39.3406677246, -58.2055664062, -58.2041931152}},
{"ywmj", []float64{81.03515625, 81.2109375, 119.53125, 119.8828125}},
{"2717c52t", []float64{-27.4471092224, -27.446937561, -166.947555542, -166.947212219}},
{"t1h", []float64{5.625, 7.03125, 50.625, 52.03125}},
{"c", []float64{45.0, 90.0, -135.0, -90.0}},
{"5xphd2", []float64{-49.833984375, -49.8284912109, -12.5573730469, -12.5463867188}},
{"xy8", []float64{36.5625, 37.96875, 168.75, 170.15625}},
{"t", []float64{0.0, 45.0, 45.0, 90.0}},
{"3pet007v7qb", []float64{-1.93128302693, -1.93128168583, -130.072835684, -130.072834343}},
{"dyuz", []float64{39.19921875, 39.375, -49.5703125, -49.21875}},
{"6r", []float64{-5.625, 0.0, -78.75, -67.5}},
{"y8v06s5", []float64{49.2846679688, 49.2860412598, 119.645233154, 119.646606445}},
{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
{"mfuy2m", []float64{-28.4051513672, -28.3996582031, 85.4406738281, 85.4516601562}},
{"dnp2u", []float64{33.8818359375, 33.92578125, -79.62890625, -79.5849609375}},
{"g4u3ehx6", []float64{60.757484436, 60.7576560974, -38.8816452026, -38.8813018799}},
{"q14h9tg88tx", []float64{-38.5522833467, -38.5522820055, 92.8832553327, 92.8832566738}},
{"d4m6rn", []float64{13.0847167969, 13.0902099609, -82.3095703125, -82.2985839844}},
{"64", []float64{-33.75, -28.125, -90.0, -78.75}},
{"y2nzb", []float64{46.3623046875, 46.40625, 110.7421875, 110.786132812}},
{"yhs5m8ytcgxb", []float64{70.8889147639, 70.8889149316, 95.8757111058, 95.875711441}},
{"ytp9j8f0dv8", []float64{73.305016458, 73.3050177991, 123.291438818, 123.291440159}},
{"pxcpm2h", []float64{-45.1318359375, -45.1304626465, 159.142456055, 159.143829346}},
{"5zyf9", []float64{-45.966796875, -45.9228515625, -1.7138671875, -1.669921875}},
{"wmz7v2", []float64{33.0029296875, 33.0084228516, 111.676025391, 111.687011719}},
{"3fb7hkc8wus", []float64{-28.9777037501, -28.977702409, -100.709314942, -100.709313601}},
{"ssstmsrv", []float64{26.2595558167, 26.259727478, 29.0804672241, 29.0808105469}},
{"21", []float64{-39.375, -33.75, -180.0, -168.75}},
{"w0du7r8257", []float64{3.60078513622, 3.60079050064, 94.0104925632, 94.0105032921}},
{"fhx", []float64{70.3125, 71.71875, -80.15625, -78.75}},
{"1xd", []float64{-47.8125, -46.40625, -109.6875, -108.28125}},
{"s040p9v3shb2", []float64{0.00989601016045, 0.00989617779851, 3.14947161824, 3.14947195351}},
{"gq", []float64{78.75, 84.375, -33.75, -22.5}},
{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
{"17b4wsgdq0q", []float64{-68.4403167665, -68.4403154254, -123.459283412, -123.45928207}},
{"x3qs", []float64{7.734375, 7.91015625, 155.390625, 155.7421875}},
{"rc", []float64{-39.375, -33.75, 168.75, 180.0}},
{"sfxjv06b9", []float64{15.0747013092, 15.0747442245, 43.8172960281, 43.8173389435}},
{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
{"ngveyg", []float64{-68.2305908203, -68.2250976562, 131.781005859, 131.791992188}},
{"pmxd5bd39qp", []float64{-58.7079012394, -58.7078998983, 156.964822859, 156.9648242}},
{"xhw6", []float64{25.6640625, 25.83984375, 143.7890625, 144.140625}},
{"bc6vx6", []float64{53.0090332031, 53.0145263672, -142.064208984, -142.053222656}},
{"uuxy5sbu", []float64{71.3939666748, 71.3941383362, 44.803276062, 44.8036193848}},
{"yu", []float64{67.5, 73.125, 123.75, 135.0}},
{"610pf9c3ebh", []float64{-38.0028247833, -38.0028234422, -89.888253808, -89.8882524669}},
{"9d", []float64{11.25, 16.875, -112.5, -101.25}},
{"1s0tk4yp", []float64{-66.5608406067, -66.5606689453, -111.612854004, -111.612510681}},
{"ss7yjqhs5su", []float64{24.9946086109, 24.994609952, 28.0104857683, 28.0104871094}},
{"krqww1d0kp", []float64{-3.06785166264, -3.06784629822, 20.6572151184, 20.6572258472}},
{"4yzg18c07qnm", []float64{-51.4997104369, -51.4997102693, -45.2841233835, -45.2841230482}},
{"5wf241", []float64{-52.0257568359, -52.0202636719, -19.248046875, -19.2370605469}},
{"gxrh60ce", []float64{86.5329551697, 86.5331268311, -12.5662994385, -12.5659561157}},
{"5rzrgr9qf", []float64{-45.0015878677, -45.0015449524, -23.4100627899, -23.4100198746}},
{"u3", []float64{50.625, 56.25, 11.25, 22.5}},
{"8tm4uz7c", []float64{30.0546455383, 30.0548171997, -150.254859924, -150.254516602}},
{"wz5f3", []float64{39.7705078125, 39.814453125, 129.067382812, 129.111328125}},
{"ckz", []float64{71.71875, 73.125, -113.90625, -112.5}},
{"h4s31", []float64{-75.76171875, -75.7177734375, 6.0205078125, 6.064453125}},
{"hguje", []float64{-67.939453125, -67.8955078125, 39.5068359375, 39.55078125}},
{"4rnfer", []float64{-50.1470947266, -50.1416015625, -69.1149902344, -69.1040039062}},
{"gj", []float64{73.125, 78.75, -45.0, -33.75}},
{"04", []float64{-78.75, -73.125, -180.0, -168.75}},
{"kvj", []float64{-16.875, -15.46875, 40.78125, 42.1875}},
{"7c3p", []float64{-36.73828125, -36.5625, -9.84375, -9.4921875}},
{"rdw55", []float64{-30.41015625, -30.3662109375, 166.069335938, 166.11328125}},
{"7spe18wk094b", []float64{-21.969217658, -21.9692174904, -11.8785988167, -11.8785984814}},
{"uxumm", []float64{89.5166015625, 89.560546875, 28.6962890625, 28.740234375}},
{"1n0sh0", []float64{-55.546875, -55.5413818359, -134.12109375, -134.110107422}},
{"cphmy", []float64{85.3857421875, 85.4296875, -128.759765625, -128.715820312}},
{"sd", []float64{11.25, 16.875, 22.5, 33.75}},
{"h6jbb2gxyd0", []float64{-78.6127030849, -78.6127017438, 19.3520092964, 19.3520106375}},
{"x3", []float64{5.625, 11.25, 146.25, 157.5}},
{"yv289c0nbs", []float64{74.625813961, 74.6258193254, 124.530050755, 124.530061483}},
{"g1fxbp5", []float64{56.2445068359, 56.245880127, -41.480255127, -41.4788818359}},
{"bqdh", []float64{82.265625, 82.44140625, -165.9375, -165.5859375}},
{"w558neznn3", []float64{16.8966346979, 16.8966400623, 95.2174007893, 95.2174115181}},
{"up", []float64{84.375, 90.0, 0.0, 11.25}},
{"pmy73pm2r", []float64{-57.0450925827, -57.0450496674, 155.090517998, 155.090560913}},
{"jzerkufsjnh", []float64{-46.5112745762, -46.5112732351, 83.5327059031, 83.5327072442}},
{"8eks", []float64{18.984375, 19.16015625, -151.171875, -150.8203125}},
{"3rryyvfxc", []float64{-2.99931049347, -2.99926757812, -112.551455498, -112.551412582}},
{"vn0cz", []float64{79.0576171875, 79.1015625, 46.3623046875, 46.40625}},
{"skb5cnez", []float64{27.4148368835, 27.4150085449, 11.2990951538, 11.2994384766}},
{"3p5cu0yju", []float64{-5.31227588654, -5.31223297119, -129.542369843, -129.542326927}},
{"8yrwss1y2s", []float64{36.3218951225, 36.3219004869, -135.502946377, -135.502935648}},
{"7x9kys4ydp", []float64{-1.95441305637, -1.95440769196, -20.4526805878, -20.4526698589}},
{"u7gscks3", []float64{66.9536018372, 66.9537734985, 16.2326431274, 16.2329864502}},
{"30c8pz7", []float64{-40.7414245605, -40.7400512695, -132.545928955, -132.544555664}},
{"umertwj6wxuc", []float64{77.2892892547, 77.2892894223, 16.0695068166, 16.0695071518}},
{"n6", []float64{-78.75, -73.125, 101.25, 112.5}},
{"br8dqrhg058f", []float64{87.6219940558, 87.6219942234, -167.765692659, -167.765692323}},
{"tp", []float64{39.375, 45.0, 45.0, 56.25}},
{"33uy", []float64{-34.1015625, -33.92578125, -117.0703125, -116.71875}},
{"u3ps1jnxg", []float64{51.356921196, 51.3569641113, 21.8498754501, 21.8499183655}},
{"nqwmqymbyz", []float64{-52.4801498652, -52.4801445007, 110.343879461, 110.34389019}},
{"wfugcgd", []float64{16.1471557617, 16.1485290527, 130.509338379, 130.51071167}},
{"vp7g2eg", []float64{86.3731384277, 86.3745117188, 50.2995300293, 50.3009033203}},
{"zz", []float64{84.375, 90.0, 168.75, 180.0}},
{"859t0xmm6gwk", []float64{20.6071523577, 20.6071525253, -177.861316167, -177.861315832}},
{"bvjnf49wp1hu", []float64{74.3262923509, 74.3262925185, -139.128492661, -139.128492326}},
{"08ybs", []float64{-85.693359375, -85.6494140625, -147.83203125, -147.788085938}},
{"dr908p7", []float64{42.3152160645, 42.3165893555, -77.339630127, -77.3382568359}},
{"gufpbsu", []float64{73.1071472168, 73.1085205078, -8.41003417969, -8.40866088867}},
{"623c388eg2t", []float64{-43.3706304431, -43.370629102, -76.2223117054, -76.2223103642}},
{"8rc", []float64{43.59375, 45.0, -167.34375, -165.9375}},
{"h4", []float64{-78.75, -73.125, 0.0, 11.25}},
{"x84", []float64{0.0, 1.40625, 160.3125, 161.71875}},
{"bbxgcx6nyzk", []float64{48.5127027333, 48.5127040744, -135.282602906, -135.282601565}},
{"tqg06239nrht", []float64{38.014278654, 38.0142788216, 60.5699611455, 60.5699614808}},
{"05fd", []float64{-68.5546875, -68.37890625, -176.484375, -176.1328125}},
{"wuc", []float64{26.71875, 28.125, 125.15625, 126.5625}},
{"m3hh", []float64{-38.671875, -38.49609375, 61.875, 62.2265625}},
{"m2w4ru", []float64{-41.7700195312, -41.7645263672, 65.0280761719, 65.0390625}},
{"4cz3k", []float64{-79.9365234375, -79.892578125, -45.87890625, -45.8349609375}},
{"jqefz9v", []float64{-52.9444885254, -52.9431152344, 61.8598937988, 61.8612670898}},
{"qqcnf60wjf", []float64{-5.83269953728, -5.83269417286, 102.756060362, 102.756071091}},
{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
{"n8qen549x9vr", []float64{-88.0496587045, -88.0496585369, 121.908059008, 121.908059344}},
{"g01nwm2wyj", []float64{46.1726027727, 46.1726081371, -43.3181476593, -43.3181369305}},
{"6xc", []float64{-1.40625, 0.0, -66.09375, -64.6875}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"nmjjnhz67", []float64{-60.9696149826, -60.9695720673, 108.555006981, 108.555049896}},
{"2gzwv1zggqg", []float64{-22.7094335854, -22.7094322443, -135.472611934, -135.472610593}},
{"dt", []float64{28.125, 33.75, -67.5, -56.25}},
{"v33ysf2y0s", []float64{53.1872391701, 53.1872445345, 58.9207291603, 58.9207398891}},
{"fwy6tccjp3", []float64{83.4186798334, 83.4186851978, -58.4565675259, -58.456556797}},
{"qug6fjx4ue8k", []float64{-17.7671476454, -17.7671474777, 128.418009616, 128.418009952}},
{"8y59duq21", []float64{34.0370178223, 34.0370607376, -141.198649406, -141.198606491}},
{"pxgfrpkhdnk", []float64{-45.9701107442, -45.9701094031, 163.086639047, 163.086640388}},
{"91b", []float64{9.84375, 11.25, -135.0, -133.59375}},
{"v83mnvugmh", []float64{47.3173213005, 47.3173266649, 69.5611810684, 69.5611917973}},
{"k76cem9", []float64{-26.4248657227, -26.4234924316, 15.2613830566, 15.2627563477}},
{"ydjq0mk", []float64{57.3335266113, 57.3348999023, 119.899291992, 119.900665283}},
{"cdsuf", []float64{59.8974609375, 59.94140625, -105.732421875, -105.688476562}},
{"tuyz3", []float64{27.9931640625, 28.037109375, 88.2861328125, 88.330078125}},
{"r7r2skwfj00", []float64{-26.605796814, -26.6057954729, 156.641564369, 156.64156571}},
{"8hx507p9f5x", []float64{25.8566424251, 25.8566437662, -170.134868771, -170.13486743}},
{"cc", []float64{50.625, 56.25, -101.25, -90.0}},
{"pkuuvxz1z3", []float64{-62.4034112692, -62.4034059048, 153.181310892, 153.181321621}},
{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
{"7vvz9gbf", []float64{-11.316947937, -11.3167762756, -3.08612823486, -3.08578491211}},
{"54r", []float64{-77.34375, -75.9375, -35.15625, -33.75}},
{"5r0mm0pwj1j", []float64{-49.7011131048, -49.7011117637, -33.1681899726, -33.1681886315}},
{"mx", []float64{-5.625, 0.0, 67.5, 78.75}},
{"rm39jwc", []float64{-15.2558898926, -15.2545166016, 148.60244751, 148.603820801}},
{"x", []float64{0.0, 45.0, 135.0, 180.0}},
{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
{"4gqrkyvqc", []float64{-70.4060983658, -70.4060554504, -47.2449445724, -47.2449016571}},
{"9s36btfr9", []float64{24.4225215912, 24.4225645065, -110.717082024, -110.717039108}},
{"dxt5", []float64{42.71484375, 42.890625, -60.46875, -60.1171875}},
{"21x8tven9", []float64{-36.4432811737, -36.4432382584, -169.196276665, -169.196233749}},
{"7hh", []float64{-22.5, -21.09375, -39.375, -37.96875}},
{"v7nm8q7s", []float64{62.8768157959, 62.8769874573, 65.0548553467, 65.0551986694}},
{"e9qxh", []float64{8.26171875, 8.3056640625, -13.18359375, -13.1396484375}},
{"273", []float64{-26.71875, -25.3125, -167.34375, -165.9375}},
{"p99qpkcvb", []float64{-80.4807329178, -80.4806900024, 159.578819275, 159.57886219}},
{"q2062f9csybw", []float64{-44.5904645696, -44.590464402, 101.637129262, 101.637129597}},
{"btsq32", []float64{77.0361328125, 77.0416259766, -151.468505859, -151.457519531}},
{"1cj5", []float64{-83.84765625, -83.671875, -94.21875, -93.8671875}},
{"j71pemzwqxt9", []float64{-71.7739416473, -71.7739414796, 57.8096582741, 57.8096586093}},
{"qnqcbndu0nf", []float64{-9.49970439076, -9.49970304966, 99.4959667325, 99.4959680736}},
{"2v7cm6junqb", []float64{-15.237314254, -15.2373129129, -140.737684965, -140.737683624}},
{"e7", []float64{16.875, 22.5, -33.75, -22.5}},
{"s91p45", []float64{6.87194824219, 6.87744140625, 23.994140625, 24.0051269531}},
{"xwk1de36", []float64{35.438117981, 35.4382896423, 163.236579895, 163.236923218}},
{"gf2", []float64{57.65625, 59.0625, -11.25, -9.84375}},
{"d1m", []float64{7.03125, 8.4375, -82.96875, -81.5625}},
{"0b", []float64{-90.0, -84.375, -146.25, -135.0}},
{"rw4", []float64{-11.25, -9.84375, 160.3125, 161.71875}},
{"74pk9xsb", []float64{-32.9177856445, -32.9176139832, -34.7322463989, -34.7319030762}},
{"4ghe0cn8s", []float64{-72.5920772552, -72.5920343399, -49.8798179626, -49.8797750473}},
{"tw4", []float64{33.75, 35.15625, 70.3125, 71.71875}},
{"gevx3", []float64{67.3681640625, 67.412109375, -14.7216796875, -14.677734375}},
{"kw8w6yqc1ks", []float64{-7.30433911085, -7.30433776975, 23.3333033323, 23.3333046734}},
{"vbdjwzpg7", []float64{48.8183069229, 48.8183498383, 81.8699026108, 81.8699455261}},
{"tp", []float64{39.375, 45.0, 45.0, 56.25}},
{"751q", []float64{-27.0703125, -26.89453125, -43.2421875, -42.890625}},
{"yeurzu", []float64{67.4780273438, 67.4835205078, 118.817138672, 118.828125}},
{"svx", []float64{30.9375, 32.34375, 43.59375, 45.0}},
{"4yrg1w", []float64{-54.2834472656, -54.2779541016, -45.2856445312, -45.2746582031}},
{"h6xmxenmzkc", []float64{-74.9532110989, -74.9532097578, 21.7837978899, 21.7837992311}},
{"j5uzmqughwj1", []float64{-67.5942097418, -67.5942095742, 51.9171233475, 51.9171236828}},
{"26trngxu", []float64{-29.6871185303, -29.6869468689, -161.059913635, -161.059570312}},
{"9", []float64{0.0, 45.0, -135.0, -90.0}},
{"h58jxs7g", []float64{-69.3218421936, -69.3216705322, 0.334739685059, 0.335083007812}},
{"tg2kgbk3", []float64{19.1177558899, 19.1179275513, 79.2721939087, 79.2725372314}},
{"x34hfu", []float64{6.48193359375, 6.48742675781, 149.183349609, 149.194335938}},
{"774j", []float64{-27.24609375, -27.0703125, -30.9375, -30.5859375}},
{"yf", []float64{56.25, 61.875, 123.75, 135.0}},
{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
{"e273k4gje9s", []float64{1.64203494787, 1.64203628898, -28.9996308088, -28.9996294677}},
{"e", []float64{0.0, 45.0, -45.0, 0.0}},
{"hx", []float64{-50.625, -45.0, 22.5, 33.75}},
{"wz", []float64{39.375, 45.0, 123.75, 135.0}},
{"w7p94b", []float64{17.05078125, 17.0562744141, 111.917724609, 111.928710938}},
{"69sgzyb46pbs", []float64{-35.8658129722, -35.8658128045, -60.4796498269, -60.4796494916}},
{"7f7m8k2u5", []float64{-31.3529205322, -31.3528776169, -6.66754245758, -6.66749954224}},
{"gq4n7m", []float64{79.8760986328, 79.8815917969, -30.7946777344, -30.7836914062}},
{"c1srg1pz8kt", []float64{54.8066094518, 54.8066107929, -128.880941123, -128.880939782}},
{"b2h1dy", []float64{45.2966308594, 45.3021240234, -163.004150391, -162.993164062}},
{"7170zy", []float64{-37.8039550781, -37.7984619141, -40.4406738281, -40.4296875}},
{"p09", []float64{-87.1875, -85.78125, 136.40625, 137.8125}},
{"sw970ux6", []float64{37.114906311, 37.1150779724, 24.3007278442, 24.301071167}},
{"rc8hsdptqn", []float64{-35.7595646381, -35.7595592737, 168.958311081, 168.95832181}},
{"qp3jujqc1", []float64{-3.17899703979, -3.17895412445, 91.5913438797, 91.591386795}},
{"dn7n9krj2tgg", []float64{36.3231066428, 36.3231068105, -85.7166788355, -85.7166785002}},
{"23e7vwt", []float64{-35.8676147461, -35.8662414551, -163.931121826, -163.929748535}},
{"um", []float64{73.125, 78.75, 11.25, 22.5}},
{"p8ws1", []float64{-86.484375, -86.4404296875, 166.684570312, 166.728515625}},
{"vqt63tm6xx", []float64{81.9873136282, 81.9873189926, 63.7062621117, 63.7062728405}},
{"b", []float64{45.0, 90.0, -180.0, -135.0}},
{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
{"bm20d", []float64{74.619140625, 74.6630859375, -168.662109375, -168.618164062}},
{"p3t4k9c0", []float64{-81.1573791504, -81.157207489, 153.480377197, 153.48072052}},
{"x", []float64{0.0, 45.0, 135.0, 180.0}},
{"8rsnr3k0", []float64{43.2929992676, 43.293170929, -162.80090332, -162.800559998}},
{"b739vykqw", []float64{63.6243152618, 63.6243581772, -166.381845474, -166.381802559}},
{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
{"13zrrq", []float64{-78.8488769531, -78.8433837891, -113.236083984, -113.225097656}},
{"6yk9jt", []float64{-9.64050292969, -9.63500976562, -49.6801757812, -49.6691894531}},
{"zmn0", []float64{73.125, 73.30078125, 154.6875, 155.0390625}},
{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
{"ps151cxkk8", []float64{-66.9636869431, -66.9636815786, 158.993303776, 158.993314505}},
{"tn", []float64{33.75, 39.375, 45.0, 56.25}},
{"u7", []float64{61.875, 67.5, 11.25, 22.5}},
{"55yuz", []float64{-68.0712890625, -68.02734375, -35.2001953125, -35.15625}},
{"x0p3grv8k", []float64{0.350232124329, 0.350275039673, 145.345859528, 145.345902443}},
{"y5c6gjhshg", []float64{66.6053169966, 66.605322361, 91.896032095, 91.8960428238}},
{"6fd2j", []float64{-30.9375, -30.8935546875, -52.8662109375, -52.822265625}},
{"gegbfkt19cj6", []float64{66.2505683675, 66.2505685352, -17.1207369491, -17.1207366139}},
{"k7", []float64{-28.125, -22.5, 11.25, 22.5}},
{"y1ydbr", []float64{55.3656005859, 55.37109375, 99.1516113281, 99.1625976562}},
{"byqsd", []float64{80.947265625, 80.9912109375, -137.021484375, -136.977539062}},
{"mfcbxhf", []float64{-29.4172668457, -29.4158935547, 81.5213012695, 81.5226745605}},
{"yxwd5901", []float64{87.5447273254, 87.5448989868, 121.794433594, 121.794776917}},
{"24k7s7y0gqgj", []float64{-31.7077504657, -31.7077502981, -173.828286678, -173.828286342}},
{"9031fbu", []float64{1.71798706055, 1.71936035156, -133.467407227, -133.466033936}},
{"xqusuduvmc", []float64{38.8197237253, 38.8197290897, 152.782648802, 152.782659531}},
{"5bxw", []float64{-86.1328125, -85.95703125, -0.703125, -0.3515625}},
{"xw593h52f", []float64{33.9918279648, 33.9918708801, 162.470369339, 162.470412254}},
{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
{"9xxm2s", []float64{43.1323242188, 43.1378173828, -102.282714844, -102.271728516}},
{"byf7t", []float64{83.583984375, 83.6279296875, -142.866210938, -142.822265625}},
{"v6", []float64{56.25, 61.875, 56.25, 67.5}},
{"yh", []float64{67.5, 73.125, 90.0, 101.25}},
{"d6k43xp", []float64{13.0902099609, 13.091583252, -73.0494689941, -73.0480957031}},
{"k4m5h", []float64{-31.81640625, -31.7724609375, 7.20703125, 7.2509765625}},
{"r1t7", []float64{-36.03515625, -35.859375, 142.3828125, 142.734375}},
{"cs4kvrjdkm7", []float64{68.3738274872, 68.3738288283, -109.097485095, -109.097483754}},
{"unu", []float64{82.96875, 84.375, 5.625, 7.03125}},
{"59xp", []float64{-80.33203125, -80.15625, -12.65625, -12.3046875}},
{"542p4hbm", []float64{-76.0863304138, -76.0861587524, -44.9117660522, -44.9114227295}},
{"5j", []float64{-61.875, -56.25, -45.0, -33.75}},
{"v3", []float64{50.625, 56.25, 56.25, 67.5}},
{"mstr13c0", []float64{-18.4474182129, -18.4472465515, 74.9391174316, 74.9394607544}},
{"wcvtdg61swv8", []float64{10.8286933601, 10.8286935277, 131.608171687, 131.608172022}},
{"3cm40m0w91z3", []float64{-37.5885963254, -37.5885961577, -94.207024388, -94.2070240527}},
{"m9fup0", []float64{-34.453125, -34.4476318359, 71.6748046875, 71.6857910156}},
{"jx", []float64{-50.625, -45.0, 67.5, 78.75}},
{"7myut8cg", []float64{-11.8605995178, -11.8604278564, -24.013710022, -24.0133666992}},
{"d", []float64{0.0, 45.0, -90.0, -45.0}},
{"5dgu11uuf12g", []float64{-73.8176893629, -73.8176891953, -17.1760072187, -17.1760068834}},
{"qp1ens1zqg", []float64{-5.07442295551, -5.07441759109, 92.3977124691, 92.3977231979}},
{"vusxu8ewwbrz", []float64{71.6786695831, 71.6786697507, 85.2809854969, 85.2809858322}},
{"8qjtgd1q", []float64{34.7727584839, 34.7729301453, -160.860099792, -160.85975647}},
{"60dppvw", []float64{-40.9268188477, -40.9254455566, -86.838684082, -86.837310791}},
{"tygxz83s2", []float64{39.3331575394, 39.3332004547, 84.0035247803, 84.0035676956}},
{"e0qwrc", []float64{2.51037597656, 2.51586914062, -35.5187988281, -35.5078125}},
{"5wyh2qbz", []float64{-51.2458992004, -51.2457275391, -14.0504837036, -14.0501403809}},
{"0zqs", []float64{-48.515625, -48.33984375, -137.109375, -136.7578125}},
{"n5ss", []float64{-69.609375, -69.43359375, 96.328125, 96.6796875}},
{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
{"v9q84x9n8ktx", []float64{52.0735898428, 52.0735900104, 76.7518796772, 76.7518800125}},
{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
{"2eh8wf4ktz", []float64{-28.0253130198, -28.0253076553, -150.871907473, -150.871896744}},
{"cr96dg281x", []float64{87.6448434591, 87.6448488235, -121.870586872, -121.870576143}},
{"56u", []float64{-74.53125, -73.125, -28.125, -26.71875}},
{"628vt", []float64{-41.220703125, -41.1767578125, -77.4755859375, -77.431640625}},
{"0heb", []float64{-64.6875, -64.51171875, -174.7265625, -174.375}},
{"8q5skg4mk", []float64{34.5144510269, 34.5144939423, -163.616123199, -163.616080284}},
{"7x0pw97495hw", []float64{-4.2993279174, -4.29932774976, -22.2101866454, -22.2101863101}},
{"1j4vjyz1dqx", []float64{-60.9587225318, -60.9587211907, -130.870407969, -130.870406628}},
{"u4q1szh", []float64{57.9583740234, 57.9597473145, 8.65173339844, 8.65310668945}},
{"3v3dbm2uv2", []float64{-14.9556970596, -14.9556916952, -99.1283833981, -99.1283726692}},
{"te3jtfwjnq", []float64{19.2626702785, 19.262675643, 69.1674435139, 69.1674542427}},
{"sgvgx3ey6qe", []float64{21.7183318734, 21.7183332145, 42.1597914398, 42.1597927809}},
{"2sw", []float64{-19.6875, -18.28125, -149.0625, -147.65625}},
{"wzy2pqu8e", []float64{43.6309146881, 43.6309576035, 132.863974571, 132.864017487}},
{"dk2849", []float64{23.9117431641, 23.9172363281, -77.9370117188, -77.9260253906}},
{"0d65", []float64{-76.81640625, -76.640625, -154.6875, -154.3359375}},
{"84", []float64{11.25, 16.875, -180.0, -168.75}},
{"q17", []float64{-37.96875, -36.5625, 94.21875, 95.625}},
{"x9wzer", []float64{9.79431152344, 9.7998046875, 167.135009766, 167.145996094}},
{"7xk2s7425n6", []float64{-4.1143463552, -4.1143450141, -16.3334485888, -16.3334472477}},
{"jv", []float64{-61.875, -56.25, 78.75, 90.0}},
{"u5juvw", []float64{62.7429199219, 62.7484130859, 8.32763671875, 8.33862304688}},
{"cnuczt0c", []float64{83.3040046692, 83.3041763306, -127.989692688, -127.989349365}},
{"7f3jv330zuh", []float64{-31.3259911537, -31.3259898126, -9.61132586002, -9.61132451892}},
{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
{"70jqgb8uv", []float64{-43.8099145889, -43.8098716736, -37.4511480331, -37.4511051178}},
{"u5t5d155", []float64{65.3087425232, 65.3089141846, 7.12326049805, 7.1236038208}},
{"r9hy1gz8m4p", []float64{-38.2996594906, -38.2996581495, 164.267115444, 164.267116785}},
{"qwmurb9920", []float64{-9.09371852875, -9.09371316433, 120.928573608, 120.928584337}},
{"d2pt693fw4", []float64{0.930157899857, 0.930163264275, -68.0906009674, -68.0905902386}},
{"zfgbx", []float64{60.556640625, 60.6005859375, 174.331054688, 174.375}},
{"c7mtdqkfuuc", []float64{64.2828767002, 64.2828780413, -115.910019726, -115.910018384}},
{"9", []float64{0.0, 45.0, -135.0, -90.0}},
{"w89eqd", []float64{3.39477539062, 3.40026855469, 114.895019531, 114.906005859}},
{"75mtem68vx", []float64{-25.7229477167, -25.7229423523, -37.1191334724, -37.1191227436}},
{"12e9fwr", []float64{-86.8455505371, -86.8441772461, -118.708648682, -118.707275391}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"nq", []float64{-56.25, -50.625, 101.25, 112.5}},
{"svnx", []float64{29.35546875, 29.53125, 42.890625, 43.2421875}},
{"0e1kd5pxdgt", []float64{-72.316198647, -72.3161973059, -155.64387247, -155.643871129}},
{"pgk", []float64{-71.71875, -70.3125, 174.375, 175.78125}},
{"vry9q", []float64{88.8134765625, 88.857421875, 65.654296875, 65.6982421875}},
{"6f2x91u", []float64{-31.0157775879, -31.0144042969, -55.4974365234, -55.4960632324}},
{"3hefw", []float64{-19.248046875, -19.2041015625, -129.462890625, -129.418945312}},
{"g", []float64{45.0, 90.0, -45.0, 0.0}},
{"m66erp", []float64{-31.7340087891, -31.728515625, 60.0732421875, 60.0842285156}},
{"5nwny", []float64{-52.2509765625, -52.20703125, -36.298828125, -36.2548828125}},
{"d5", []float64{16.875, 22.5, -90.0, -78.75}},
{"wuphy8", []float64{23.3349609375, 23.3404541016, 133.879394531, 133.890380859}},
{"b8u631kf", []float64{49.6214675903, 49.6216392517, -151.472969055, -151.472625732}},
{"34d8j", []float64{-30.9375, -30.8935546875, -131.264648438, -131.220703125}},
{"x", []float64{0.0, 45.0, 135.0, 180.0}},
{"he", []float64{-73.125, -67.5, 22.5, 33.75}},
{"yec1yepjd", []float64{66.4187908173, 66.4188337326, 114.201593399, 114.201636314}},
{"h0p58hpz", []float64{-89.3615913391, -89.3614196777, 9.85439300537, 9.85473632812}},
{"8u", []float64{22.5, 28.125, -146.25, -135.0}},
{"hg9sxf0mu", []float64{-69.509510994, -69.5094680786, 36.200466156, 36.2005090714}},
{"7zdg4c9868", []float64{-2.27687358856, -2.27686822414, -7.25979566574, -7.2597849369}},
{"1h", []float64{-67.5, -61.875, -135.0, -123.75}},
{"k7h239zr4097", []float64{-28.0702368356, -28.070236668, 17.3025243357, 17.302524671}},
{"9h", []float64{22.5, 28.125, -135.0, -123.75}},
{"fx", []float64{84.375, 90.0, -67.5, -56.25}},
{"sf66h05", []float64{13.0078125, 13.009185791, 37.093963623, 37.0953369141}},
{"4ge39", []float64{-70.048828125, -70.0048828125, -51.6357421875, -51.591796875}},
{"w", []float64{0.0, 45.0, 90.0, 135.0}},
{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
{"ypd", []float64{87.1875, 88.59375, 92.8125, 94.21875}},
{"gbzh2r", []float64{50.0042724609, 50.009765625, -1.39526367188, -1.38427734375}},
{"h0", []float64{-90.0, -84.375, 0.0, 11.25}},
{"nk7wkxwvku2", []float64{-64.952994436, -64.9529930949, 106.379102468, 106.37910381}},
{"23qu", []float64{-37.265625, -37.08984375, -159.2578125, -158.90625}},
{"n15b3", []float64{-84.3310546875, -84.287109375, 95.3173828125, 95.361328125}},
{"8x9c6f1cjkn", []float64{42.4184060097, 42.4184073508, -154.915576279, -154.915574938}},
{"5dr", []float64{-77.34375, -75.9375, -12.65625, -11.25}},
{"022q4", []float64{-87.5390625, -87.4951171875, -168.310546875, -168.266601562}},
{"52", []float64{-90.0, -84.375, -33.75, -22.5}},
{"s0j8hb", []float64{0.0, 0.0054931640625, 7.94311523438, 7.9541015625}},
{"58ygg9vn9nj", []float64{-85.1113092899, -85.1113079488, -12.8470878303, -12.8470864892}},
{"ztzqs1", []float64{78.4918212891, 78.4973144531, 167.87109375, 167.882080078}},
{"n5x", []float64{-70.3125, -68.90625, 99.84375, 101.25}},
{"jh593g3fsf", []float64{-67.261980772, -67.2619754076, 50.001386404, 50.0013971329}},
{"8vjg52364", []float64{28.6540603638, 28.6541032791, -138.01943779, -138.019394875}},
{"dqeh1", []float64{37.265625, 37.3095703125, -74.4873046875, -74.443359375}},
{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
{"m1mjx", []float64{-37.001953125, -36.9580078125, 52.3388671875, 52.3828125}},
{"w4fr", []float64{16.69921875, 16.875, 93.1640625, 93.515625}},
{"k2v0kj", []float64{-40.7098388672, -40.7043457031, 18.45703125, 18.4680175781}},
{"ytmdb30u", []float64{75.0208282471, 75.0209999084, 120.246391296, 120.246734619}},
{"wzuhv", []float64{44.4287109375, 44.47265625, 129.594726562, 129.638671875}},
{"84m2ue733hx1", []float64{12.8061776049, 12.8061777726, -172.414918095, -172.41491776}},
{"02", []float64{-90.0, -84.375, -168.75, -157.5}},
{"x8j5vd68s", []float64{0.671625137329, 0.671668052673, 164.776554108, 164.776597023}},
{"7k", []float64{-22.5, -16.875, -33.75, -22.5}},
{"4xtrffmbtxr", []float64{-46.4377109706, -46.4377096295, -59.9881960452, -59.9881947041}},
{"k8x", []float64{-42.1875, -40.78125, 32.34375, 33.75}},
{"yyxsh", []float64{82.265625, 82.3095703125, 134.47265625, 134.516601562}},
{"f2rqpzxu", []float64{47.502822876, 47.5029945374, -68.2034683228, -68.203125}},
{"d6h4jg", []float64{11.6180419922, 11.6235351562, -72.8723144531, -72.861328125}},
{"e7d", []float64{19.6875, 21.09375, -30.9375, -29.53125}},
{"bbs0tcr5wc9", []float64{47.9078659415, 47.9078672826, -140.362410396, -140.362409055}},
{"7fuf", []float64{-29.1796875, -29.00390625, -4.5703125, -4.21875}},
{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
{"tt9p6y", []float64{32.2448730469, 32.2503662109, 69.0270996094, 69.0380859375}},
{"xypsv42g6pr", []float64{34.5979173481, 34.5979186893, 179.517726749, 179.51772809}},
{"p4gvh3sr97h", []float64{-73.6428004503, -73.6427991092, 140.466100574, 140.466101915}},
{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
{"6ecy4rk", []float64{-22.8117370605, -22.8103637695, -64.9346923828, -64.9333190918}},
{"045xe38uj", []float64{-77.4227142334, -77.4226713181, -174.934058189, -174.934015274}},
{"xyunrc48", []float64{39.0728759766, 39.0730476379, 174.719009399, 174.719352722}},
{"enm03", []float64{35.2001953125, 35.244140625, -37.9248046875, -37.880859375}},
{"n7r9k6ecbhm", []float64{-71.4849673212, -71.4849659801, 111.988799125, 111.988800466}},
{"2bpxnf2", []float64{-43.7571716309, -43.7557983398, -135.406494141, -135.40512085}},
{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
{"vs65p7h9m", []float64{69.4502878189, 69.4503307343, 70.6374979019, 70.6375408173}},
{"j6vt6yyjmms", []float64{-73.5703888535, -73.5703875124, 64.1136950254, 64.1136963665}},
{"pp7z8s7", []float64{-47.8770446777, -47.8756713867, 140.299530029, 140.30090332}},
{"skxsqyzdgn", []float64{26.0971534252, 26.0971587896, 22.103934288, 22.1039450169}},
{"y1x7mdy", []float64{54.0238952637, 54.0252685547, 100.445251465, 100.446624756}},
{"ck9n9vptne", []float64{71.4834183455, 71.4834237099, -122.256267071, -122.256256342}},
{"fq7u12930mgt", []float64{80.862324927, 80.8623250946, -73.4198988229, -73.4198984876}},
{"zs50j6", []float64{67.5109863281, 67.5164794922, 161.949462891, 161.960449219}},
{"9jccmq0j", []float64{32.5972938538, 32.5974655151, -132.308349609, -132.308006287}},
{"y", []float64{45.0, 90.0, 90.0, 135.0}},
{"hhtn7", []float64{-63.5888671875, -63.544921875, 7.1630859375, 7.20703125}},
{"480spdnxu", []float64{-89.2845582962, -89.2845153809, -66.4581871033, -66.4581441879}},
{"ghz87yh", []float64{71.7956542969, 71.7970275879, -34.2828369141, -34.281463623}},
{"cf", []float64{56.25, 61.875, -101.25, -90.0}},
{"gd1n5pnc8p7", []float64{57.3434360325, 57.3434373736, -20.9526403248, -20.9526389837}},
{"hef9dvw3q", []float64{-68.6121511459, -68.6121082306, 26.1453151703, 26.1453580856}},
{"wbjwhe8rkyf0", []float64{1.07519432902, 1.07519449666, 131.682678759, 131.682679094}},
{"pndb3wg", []float64{-53.3564758301, -53.3551025391, 138.937225342, 138.938598633}},
{"bh2k52ewybs", []float64{69.6132829785, 69.6132843196, -179.500513673, -179.500512332}},
{"t4r", []float64{12.65625, 14.0625, 54.84375, 56.25}},
{"s7215", []float64{18.45703125, 18.5009765625, 11.3818359375, 11.42578125}},
{"u8", []float64{45.0, 50.625, 22.5, 33.75}},
{"f2", []float64{45.0, 50.625, -78.75, -67.5}},
{"3hwb5r0etbt", []float64{-19.6484443545, -19.6484430134, -125.36405012, -125.364048779}},
{"wendsfh63", []float64{17.3258256912, 17.3258686066, 121.855244637, 121.855287552}},
{"90n", []float64{0.0, 1.40625, -126.5625, -125.15625}},
{"e5mx13zs4t2", []float64{19.5220465958, 19.5220479369, -37.2002863884, -37.2002850473}},
{"6kngstqxn", []float64{-21.854724884, -21.8546819687, -69.0508747101, -69.0508317947}},
{"rgs", []float64{-25.3125, -23.90625, 174.375, 175.78125}},
{"sbd1n1z90", []float64{2.99806594849, 2.99810886383, 36.8364715576, 36.836514473}},
{"693c7m26y6", []float64{-37.7197015285, -37.7196961641, -64.8956286907, -64.8956179619}},
{"ynu", []float64{82.96875, 84.375, 95.625, 97.03125}},
{"68t74uch2444", []float64{-41.6333230957, -41.6333229281, -59.9949619174, -59.9949615821}},
{"jcmv2zscc", []float64{-82.0043992996, -82.0043563843, 86.875462532, 86.8755054474}},
{"3c", []float64{-39.375, -33.75, -101.25, -90.0}},
{"r76ymc", []float64{-25.6146240234, -25.6091308594, 150.369873047, 150.380859375}},
{"kj9vj024cd", []float64{-13.1817376614, -13.1817322969, 2.68072843552, 2.68073916435}},
{"scr", []float64{7.03125, 8.4375, 43.59375, 45.0}},
{"57g8wvk", []float64{-68.7895202637, -68.7881469727, -28.5260009766, -28.5246276855}},
{"8qde6", []float64{37.1337890625, 37.177734375, -165.146484375, -165.102539062}},
{"7ve260", []float64{-14.0185546875, -14.0130615234, -6.591796875, -6.58081054688}},
{"trcxzhy6", []float64{44.9824905396, 44.9826622009, 58.6755752563, 58.6759185791}},
{"syqw7fyhx", []float64{36.2707614899, 36.2708044052, 43.0639600754, 43.0640029907}},
{"tb0", []float64{0.0, 1.40625, 78.75, 80.15625}},
{"dttugd1xtj5p", []float64{31.7847627215, 31.7847628891, -59.2579753697, -59.2579750344}},
{"v899zsbyh7f2", []float64{48.1472598016, 48.1472599693, 69.9401802197, 69.940180555}},
{"8e", []float64{16.875, 22.5, -157.5, -146.25}},
{"s", []float64{0.0, 45.0, 0.0, 45.0}},
{"ny4017r8x", []float64{-56.2320613861, -56.2320184708, 126.628031731, 126.628074646}},
{"8ued", []float64{25.6640625, 25.83984375, -141.328125, -140.9765625}},
{"bqpsbj0h9p1t", []float64{79.6132376231, 79.6132377908, -158.203080073, -158.203079738}},
{"s", []float64{0.0, 45.0, 0.0, 45.0}},
{"b5m", []float64{63.28125, 64.6875, -172.96875, -171.5625}},
{"t", []float64{0.0, 45.0, 45.0, 90.0}},
{"w722c", []float64{18.4130859375, 18.45703125, 101.645507812, 101.689453125}},
{"ey", []float64{33.75, 39.375, -11.25, 0.0}},
{"6ndrutd", []float64{-7.04498291016, -7.04360961914, -86.6354370117, -86.6340637207}},
{"c", []float64{45.0, 90.0, -135.0, -90.0}},
{"47jtykh", []float64{-72.0922851562, -72.0909118652, -70.7354736328, -70.7341003418}},
{"krb5qvkb", []float64{-0.806121826172, -0.805950164795, 11.5531539917, 11.5534973145}},
{"mtpgnes7uugs", []float64{-16.3277602941, -16.3277601264, 78.6901270598, 78.6901273951}},
{"6062z", []float64{-43.4619140625, -43.41796875, -86.5283203125, -86.484375}},
{"241quukp", []float64{-32.5389289856, -32.5387573242, -178.027954102, -178.027610779}},
{"g4m49", []float64{58.095703125, 58.1396484375, -37.9248046875, -37.880859375}},
{"wwq", []float64{35.15625, 36.5625, 120.9375, 122.34375}},
{"beujb1c", []float64{67.1141052246, 67.1154785156, -151.873626709, -151.872253418}},
{"h4", []float64{-78.75, -73.125, 0.0, 11.25}},
{"69xq1n9v", []float64{-35.4712486267, -35.4710769653, -57.2583389282, -57.2579956055}},
{"cjpu", []float64{73.828125, 74.00390625, -124.1015625, -123.75}},
{"pmks2611hw", []float64{-59.7104895115, -59.7104841471, 152.590677738, 152.590688467}},
{"zd78cuj300z8", []float64{57.8102342784, 57.8102344461, 162.505999133, 162.505999468}},
{"g9h460p7719", []float64{51.0210737586, 51.0210750997, -16.777022928, -16.7770215869}},
{"crncwf7g0c", []float64{84.6515518427, 84.6515572071, -113.955999613, -113.955988884}},
{"mp4", []float64{-5.625, -4.21875, 47.8125, 49.21875}},
{"3ghgz3gsjgr", []float64{-27.4555031955, -27.4555018544, -94.2466463149, -94.2466449738}},
{"m2g9", []float64{-40.60546875, -40.4296875, 61.171875, 61.5234375}},
{"ngt309w", []float64{-70.1284790039, -70.1271057129, 131.163024902, 131.164398193}},
{"hgn9u3537p", []float64{-72.8116375208, -72.8116321564, 43.08198452, 43.0819952488}},
{"6spnhu", []float64{-21.4233398438, -21.4178466797, -57.4475097656, -57.4365234375}},
{"z22v4r0d82", []float64{47.3240375519, 47.3240429163, 147.404261827, 147.404272556}},
{"ytypn", []float64{78.57421875, 78.6181640625, 121.201171875, 121.245117188}},
{"qstc", []float64{-19.51171875, -19.3359375, 120.5859375, 120.9375}},
{"y8r48c5", []float64{46.8511962891, 46.8525695801, 122.380828857, 122.382202148}},
{"uq2xybqqg", []float64{81.5210866928, 81.5211296082, 12.2584676743, 12.2585105896}},
{"95xm8dtz5u42", []float64{20.6692528725, 20.6692530401, -124.77465447, -124.774654135}},
{"x0uqxwj", []float64{5.39428710938, 5.39566040039, 141.313018799, 141.31439209}},
{"0kve9v", []float64{-62.6385498047, -62.6330566406, -160.938720703, -160.927734375}},
{"szwv1er87", []float64{43.0843019485, 43.0843448639, 43.3185338974, 43.3185768127}},
{"88zscymedp", []float64{5.08868157864, 5.08868694305, -146.868581772, -146.868571043}},
{"pd", []float64{-78.75, -73.125, 157.5, 168.75}},
{"g6", []float64{56.25, 61.875, -33.75, -22.5}},
{"3pg6r201vv51", []float64{-1.01041479036, -1.01041462272, -130.110833198, -130.110832863}},
{"y", []float64{45.0, 90.0, 90.0, 135.0}},
{"g02", []float64{46.40625, 47.8125, -45.0, -43.59375}},
{"jx", []float64{-50.625, -45.0, 67.5, 78.75}},
{"zksxs2nz2j3", []float64{71.6321320832, 71.6321334243, 152.774163634, 152.774164975}},
{"erqyg8ff4d", []float64{41.9722473621, 41.9722527266, -24.1001200676, -24.1001093388}},
{"n3wm1r3p4jev", []float64{-80.6425363384, -80.6425361708, 110.095458291, 110.095458627}},
{"sqfbq13pjp9", []float64{38.0208036304, 38.0208049715, 15.3824485838, 15.3824499249}},
{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
{"y0", []float64{45.0, 50.625, 90.0, 101.25}},
{"04q3tpcc68bq", []float64{-77.0372864977, -77.03728633, -170.988700055, -170.988699719}},
{"81", []float64{5.625, 11.25, -180.0, -168.75}},
{"nj", []float64{-61.875, -56.25, 90.0, 101.25}},
{"b", []float64{45.0, 90.0, -180.0, -135.0}},
{"8bxs0rfgp3n7", []float64{3.55871787295, 3.55871804059, -135.688042603, -135.688042268}},
{"j0rm6", []float64{-87.6708984375, -87.626953125, 55.283203125, 55.3271484375}},
{"86v9", []float64{15.64453125, 15.8203125, -161.015625, -160.6640625}},
{"c", []float64{45.0, 90.0, -135.0, -90.0}},
{"nmcsv", []float64{-56.8212890625, -56.77734375, 103.579101562, 103.623046875}},
{"cqytkn1d56", []float64{83.9249145985, 83.9249199629, -114.431394339, -114.43138361}},
{"jfpcxxud86r9", []float64{-78.4433147125, -78.4433145449, 89.9842279404, 89.9842282757}},
{"zf8cm8fkrg", []float64{59.2870920897, 59.2870974541, 170.049809217, 170.049819946}},
{"98x", []float64{2.8125, 4.21875, -102.65625, -101.25}},
{"e5mbzuu", []float64{18.4391784668, 18.4405517578, -36.5679931641, -36.566619873}},
{"03g5kqkcp", []float64{-79.5504570007, -79.5504140854, -164.337658882, -164.337615967}},
{"b362q0b79x2", []float64{52.0799548924, 52.0799562335, -165.321857929, -165.321856588}},
{"m3pq", []float64{-38.3203125, -38.14453125, 66.4453125, 66.796875}},
{"6564m2w5b0", []float64{-26.3198518753, -26.3198465109, -86.9485473633, -86.9485366344}},
{"m9puw71wrf", []float64{-38.5664212704, -38.566415906, 78.6754882336, 78.6754989624}},
{"z", []float64{45.0, 90.0, 135.0, 180.0}},
{"9t34u", []float64{30.0146484375, 30.05859375, -110.91796875, -110.874023438}},
{"hewq3", []float64{-69.2138671875, -69.169921875, 31.3330078125, 31.376953125}},
{"2m2uy1tgg0", []float64{-14.6249055862, -14.6249002218, -167.423615456, -167.423604727}},
{"qu8dxgebd9wz", []float64{-19.22872575, -19.2287255824, 124.798967354, 124.798967689}},
{"scwmj", []float64{9.31640625, 9.3603515625, 42.7587890625, 42.802734375}},
{"9", []float64{0.0, 45.0, -135.0, -90.0}},
{"zm02460g", []float64{73.1365013123, 73.1366729736, 146.701469421, 146.701812744}},
{"cgtd84ky", []float64{65.1403427124, 65.1405143738, -93.5091018677, -93.5087585449}},
{"eq", []float64{33.75, 39.375, -33.75, -22.5}},
{"ht26dn8h", []float64{-59.9929046631, -59.9927330017, 22.939453125, 22.9397964478}},
{"c", []float64{45.0, 90.0, -135.0, -90.0}},
{"b", []float64{45.0, 90.0, -180.0, -135.0}},
{"j70dzsu", []float64{-72.6155090332, -72.6141357422, 57.2882080078, 57.2895812988}},
{"y241djby0dk", []float64{45.2962996066, 45.2963009477, 104.151447415, 104.151448756}},
{"un9", []float64{81.5625, 82.96875, 1.40625, 2.8125}},
{"2kzdj9", []float64{-17.9241943359, -17.9187011719, -157.961425781, -157.950439453}},
{"m7b3gj70", []float64{-23.5697937012, -23.5696220398, 56.7375183105, 56.7378616333}},
{"vvpmy3", []float64{74.1412353516, 74.1467285156, 89.2199707031, 89.2309570312}},
{"ym0n0", []float64{74.1796875, 74.2236328125, 101.25, 101.293945312}},
{"k6", []float64{-33.75, -28.125, 11.25, 22.5}},
{"d96nj5sqmjfn", []float64{8.10626830906, 8.10626847669, -64.4617196918, -64.4617193565}},
{"9yfrdwk", []float64{39.3214416504, 39.3228149414, -97.9705810547, -97.9692077637}},
{"8vj712jd0kv", []float64{28.6527125537, 28.6527138948, -138.804685324, -138.804683983}},
{"bk86yhd", []float64{70.8206176758, 70.8219909668, -168.132019043, -168.130645752}},
{"6e3pb2s87q10", []float64{-25.3536236286, -25.353623461, -66.0764430463, -66.0764427111}},
{"nxbn2n7yn", []float64{-45.2722549438, -45.2722120285, 112.505407333, 112.505450249}},
{"cg4n", []float64{62.9296875, 63.10546875, -98.4375, -98.0859375}},
{"4de6s37sk467", []float64{-75.4904382862, -75.4904381186, -62.7379387245, -62.7379383892}},
{"pzegg", []float64{-47.1533203125, -47.109375, 174.155273438, 174.19921875}},
{"xytyx62", []float64{37.7174377441, 37.7188110352, 177.154541016, 177.155914307}},
{"2x4", []float64{-5.625, -4.21875, -154.6875, -153.28125}},
{"j11m8j1eywcu", []float64{-83.3800566941, -83.3800565265, 46.7601537332, 46.7601540685}},
{"k2xw57e", []float64{-41.1135864258, -41.1122131348, 21.9438171387, 21.9451904297}},
{"2q74", []float64{-9.4921875, -9.31640625, -164.53125, -164.1796875}},
{"9tp1960t8", []float64{28.4006023407, 28.400645256, -102.600631714, -102.600588799}},
{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
{"2b2mwb", []float64{-42.626953125, -42.6214599609, -145.601806641, -145.590820312}},
{"b7ts80s", []float64{65.481262207, 65.482635498, -161.010131836, -161.008758545}},
{"10x0", []float64{-87.1875, -87.01171875, -125.15625, -124.8046875}},
{"c5p9s65qqch", []float64{62.1507364511, 62.1507377923, -124.261599183, -124.261597842}},
{"n1j", []float64{-84.375, -82.96875, 97.03125, 98.4375}},
{"gjy9e2g7hcvc", []float64{77.6120662875, 77.6120664552, -35.7118779793, -35.7118776441}},
{"d0nngw26hnc0", []float64{1.22123524547, 1.2212354131, -81.408175081, -81.4081747457}},
{"sqqh99ewn", []float64{35.9565353394, 35.9565782547, 19.7584819794, 19.7585248947}},
{"27", []float64{-28.125, -22.5, -168.75, -157.5}},
{"9", []float64{0.0, 45.0, -135.0, -90.0}},
{"uv", []float64{73.125, 78.75, 33.75, 45.0}},
{"nu6", []float64{-66.09375, -64.6875, 126.5625, 127.96875}},
{"hjg9s97mjbfp", []float64{-57.3848481663, -57.3848479986, 5.12434154749, 5.12434188277}},
{"cekd", []float64{63.6328125, 63.80859375, -106.171875, -105.8203125}},
{"fx2s", []float64{86.484375, 86.66015625, -66.796875, -66.4453125}},
{"zxx5n4wh37", []float64{87.7293223143, 87.7293276787, 167.615715265, 167.615725994}},
{"k0redc", []float64{-42.9730224609, -42.9675292969, 10.6677246094, 10.6787109375}},
{"xhj810f1r7", []float64{22.504350543, 22.5043559074, 142.781378031, 142.78138876}},
{"9j", []float64{28.125, 33.75, -135.0, -123.75}},
{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
{"z59900erg", []float64{64.8673582077, 64.867401123, 137.113966942, 137.114009857}},
{"6dnccesgx8ts", []float64{-33.4225525707, -33.4225524031, -57.9350421578, -57.9350418225}},
{"jx", []float64{-50.625, -45.0, 67.5, 78.75}},
{"312y4y56", []float64{-36.8807601929, -36.8805885315, -133.819999695, -133.819656372}},
{"32vk3g9np", []float64{-40.013923645, -40.0138807297, -116.288609505, -116.288566589}},
{"kn39", []float64{-9.66796875, -9.4921875, 2.109375, 2.4609375}},
{"eed4ybdn4mpp", []float64{20.1747029833, 20.174703151, -19.3880166113, -19.3880162761}},
{"st", []float64{28.125, 33.75, 22.5, 33.75}},
{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
{"0t8kw4xyhu5x", []float64{-58.2566988654, -58.2566986978, -156.873914078, -156.873913743}},
{"dcy", []float64{9.84375, 11.25, -47.8125, -46.40625}},
{"p3r", []float64{-82.96875, -81.5625, 156.09375, 157.5}},
{"wdc0w9tbd6", []float64{15.5649769306, 15.564982295, 114.199887514, 114.199898243}},
{"j3jc", []float64{-84.19921875, -84.0234375, 64.3359375, 64.6875}},
{"k5cw3t", []float64{-22.7801513672, -22.7746582031, 2.17529296875, 2.18627929688}},
{"pd6m", []float64{-76.46484375, -76.2890625, 160.6640625, 161.015625}},
{"hnfqkjqrqnh", []float64{-50.9025013447, -50.9025000036, 3.34868967533, 3.34869101644}},
{"6qkwd", []float64{-8.701171875, -8.6572265625, -72.333984375, -72.2900390625}},
{"1q2x", []float64{-53.61328125, -53.4375, -123.046875, -122.6953125}},
{"3nps1et4nde", []float64{-10.527292192, -10.5272908509, -124.380057603, -124.380056262}},
{"gq2c8qnkx2", []float64{80.4536533356, 80.4536587, -32.6754319668, -32.6754212379}},
{"q7d", []float64{-25.3125, -23.90625, 104.0625, 105.46875}},
{"560", []float64{-78.75, -77.34375, -33.75, -32.34375}},
{"j855ffmuxv4s", []float64{-89.3276607245, -89.3276605569, 71.8478319794, 71.8478323147}},
{"sumfzt8z4", []float64{24.4210624695, 24.4211053848, 42.1666431427, 42.166686058}},
{"bje9d2n0", []float64{76.201171875, 76.2013435364, -174.971008301, -174.970664978}},
{"y943", []float64{50.80078125, 50.9765625, 115.6640625, 116.015625}},
{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
{"gmn", []float64{73.125, 74.53125, -25.3125, -23.90625}},
{"1djwe5s", []float64{-77.5881958008, -77.5868225098, -104.628295898, -104.626922607}},
{"tb5wv3", []float64{1.19201660156, 1.19750976562, 83.9025878906, 83.9135742188}},
{"58h3qnfv9m", []float64{-89.7422236204, -89.742218256, -16.2559354305, -16.2559247017}},
{"f0du0sguugkg", []float64{48.5425508581, 48.5425510257, -86.1054797843, -86.105479449}},
{"h6zyvwk", []float64{-73.3103942871, -73.3090209961, 22.3956298828, 22.3970031738}},
{"vg2nu", []float64{64.4677734375, 64.51171875, 78.92578125, 78.9697265625}},
{"j7r7de", []float64{-71.0870361328, -71.0815429688, 66.5551757812, 66.5661621094}},
{"hjshn", []float64{-58.359375, -58.3154296875, 5.888671875, 5.9326171875}},
{"46khrs1t", []float64{-76.5738487244, -76.573677063, -72.7933502197, -72.793006897}},
{"g5p5p", []float64{62.40234375, 62.4462890625, -34.8486328125, -34.8046875}},
{"7pxgy2sf", []float64{-2.15023040771, -2.15005874634, -33.8203811646, -33.8200378418}},
{"vrz", []float64{88.59375, 90.0, 66.09375, 67.5}},
{"bry3ngyuq", []float64{88.7908601761, 88.7909030914, -159.654779434, -159.654736519}},
{"9cu9t1g3cs", []float64{10.1173567772, 10.1173621416, -94.6976208687, -94.6976101398}},
{"82", []float64{0.0, 5.625, -168.75, -157.5}},
{"m9yzmzsmxx", []float64{-33.8396555185, -33.8396501541, 77.2510313988, 77.2510421276}},
{"4wv4wwxb2dr2", []float64{-51.5560363233, -51.5560361557, -60.1724312827, -60.1724309474}},
{"j7ufy8", []float64{-68.4228515625, -68.4173583984, 63.2153320312, 63.2263183594}},
{"hzrjzg9xjj", []float64{-48.1875532866, -48.1875479221, 43.9366006851, 43.936611414}},
{"z5eh3ghtm7", []float64{65.4519671202, 65.4519724846, 139.302059412, 139.302070141}},
{"6sjhet2cts", []float64{-21.6798663139, -21.6798609495, -60.3136754036, -60.3136646748}},
{"vjyn6v", []float64{78.4698486328, 78.4753417969, 53.5583496094, 53.5693359375}},
{"hysxqy94", []float64{-52.1270370483, -52.126865387, 40.3761291504, 40.3764724731}},
{"8yd4qxm0d", []float64{36.9979190826, 36.997961998, -143.144903183, -143.144860268}},
{"w43b42f", []float64{12.660369873, 12.6617431641, 92.5625610352, 92.5639343262}},
{"7suc291x", []float64{-18.0548286438, -18.0546569824, -15.7962799072, -15.7959365845}},
{"pq", []float64{-56.25, -50.625, 146.25, 157.5}},
{"frd", []float64{87.1875, 88.59375, -75.9375, -74.53125}},
{"r2", []float64{-45.0, -39.375, 146.25, 157.5}},
{"5rs1x50fe5m", []float64{-47.531902045, -47.5319007039, -27.8162173927, -27.8162160516}},
{"zjv30682s21k", []float64{77.5333506614, 77.533350829, 142.394326217, 142.394326553}},
{"zbbtmw", []float64{50.1745605469, 50.1800537109, 169.694824219, 169.705810547}},
{"e56k", []float64{18.984375, 19.16015625, -41.8359375, -41.484375}},
{"v7dzp", []float64{65.91796875, 65.9619140625, 60.4248046875, 60.46875}},
{"n2z8qt2hnzss", []float64{-85.707738027, -85.7077378593, 112.082815245, 112.08281558}},
{"zbp8bt07bb5", []float64{45.159945488, 45.1599468291, 179.319227189, 179.31922853}},
{"s551t", []float64{17.138671875, 17.1826171875, 4.4384765625, 4.482421875}},
{"5zp7trjp", []float64{-49.9701118469, -49.9699401855, -0.817108154297, -0.816764831543}},
{"81n2z7dz", []float64{5.77726364136, 5.77743530273, -170.888557434, -170.888214111}},
{"dw", []float64{33.75, 39.375, -67.5, -56.25}},
{"vvw35", []float64{76.11328125, 76.1572265625, 87.6708984375, 87.71484375}},
{"zhtuc3b8bg5n", []float64{71.1572198197, 71.1572199874, 143.141591996, 143.141592331}},
{"042w4qgx", []float64{-76.2507820129, -76.2506103516, -179.193191528, -179.192848206}},
{"9sntb7", []float64{23.5272216797, 23.5327148438, -103.348388672, -103.337402344}},
{"wt6x5nxk", []float64{30.7981109619, 30.7982826233, 116.157417297, 116.15776062}},
{"spzwehn", []float64{44.7583007812, 44.7596740723, 10.6869506836, 10.6883239746}},
{"hv70sbbpw64", []float64{-60.3754413128, -60.3754399717, 38.1777611375, 38.1777624786}},
{"wxt", []float64{42.1875, 43.59375, 119.53125, 120.9375}},
{"7dvqsskj8", []float64{-28.3643817902, -28.3643388748, -14.9139404297, -14.9138975143}},
{"wm22", []float64{29.53125, 29.70703125, 101.6015625, 101.953125}},
{"5t4kf", []float64{-61.0400390625, -60.99609375, -19.248046875, -19.2041015625}},
{"5sy67z47fz1", []float64{-62.846608758, -62.8466074169, -13.542933315, -13.5429319739}},
{"c36nw2msv0t", []float64{53.1760194898, 53.1760208309, -120.655067414, -120.655066073}},
{"311v", []float64{-38.49609375, -38.3203125, -132.5390625, -132.1875}},
{"x", []float64{0.0, 45.0, 135.0, 180.0}},
{"xcykeys", []float64{10.6704711914, 10.6718444824, 177.709350586, 177.710723877}},
{"gtmvbbv", []float64{75.5461120605, 75.5474853516, -14.3742370605, -14.3728637695}},
{"5n1", []float64{-56.25, -54.84375, -43.59375, -42.1875}},
{"y08uf", []float64{48.6474609375, 48.69140625, 91.142578125, 91.1865234375}},
{"ds4", []float64{22.5, 23.90625, -64.6875, -63.28125}},
{"1t49n9", []float64{-61.6937255859, -61.6882324219, -108.698730469, -108.687744141}},
{"v81d55x758", []float64{45.3713035583, 45.3713089228, 69.7513175011, 69.7513282299}},
{"39j80vxmjh", []float64{-39.3439078331, -39.3439024687, -104.722495079, -104.72248435}},
{"x3vd66k2vj46", []float64{10.251773335, 10.2517735027, 154.089306034, 154.089306369}},
{"vg", []float64{61.875, 67.5, 78.75, 90.0}},
{"06q04jnnuyz", []float64{-77.3150892556, -77.3150879145, -160.216156393, -160.216155052}},
{"cvws", []float64{76.640625, 76.81640625, -92.109375, -91.7578125}},
{"9d5j84gmgbv", []float64{12.2328941524, 12.2328954935, -108.276619166, -108.276617825}},
{"mjg", []float64{-12.65625, -11.25, 49.21875, 50.625}},
{"k7gspu", []float64{-23.1811523438, -23.1756591797, 16.5124511719, 16.5234375}},
{"13nrmggfx", []float64{-83.0795574188, -83.0795145035, -114.702801704, -114.702758789}},
{"ypj759", []float64{84.9078369141, 84.9133300781, 97.5366210938, 97.5476074219}},
{"hhy3z0zjn", []float64{-62.9686546326, -62.9686117172, 9.10655021667, 9.10659313202}},
{"b0xhjv", []float64{48.5430908203, 48.5485839844, -169.903564453, -169.892578125}},
{"xucn7t7t11", []float64{27.8470855951, 27.8470909595, 170.314908028, 170.314918756}},
{"f0yqwpw6", []float64{50.4028701782, 50.4030418396, -80.9386825562, -80.9383392334}},
{"hcud164f7z", []float64{-79.7932773829, -79.7932720184, 40.1369941235, 40.1370048523}},
{"k7b09t", []float64{-23.7908935547, -23.7854003906, 11.3159179688, 11.3269042969}},
{"gzttr69", []float64{88.1240844727, 88.1254577637, -3.19564819336, -3.19427490234}},
{"z1", []float64{50.625, 56.25, 135.0, 146.25}},
{"mt", []float64{-16.875, -11.25, 67.5, 78.75}},
{"vgpm3pe", []float64{62.839050293, 62.840423584, 88.9933776855, 88.9947509766}},
{"xc2xk", []float64{8.3056640625, 8.349609375, 169.62890625, 169.672851562}},
{"7gpegjrrn", []float64{-27.4357795715, -27.4357366562, -0.561075210571, -0.561032295227}},
{"5m00s41bg21m", []float64{-61.7759934627, -61.775993295, -33.5716743395, -33.5716740042}},
{"ybz9un", []float64{49.5593261719, 49.5648193359, 134.47265625, 134.483642578}},
{"5cxhpu5jy6y", []float64{-80.8364005387, -80.8363991976, -1.06127768755, -1.06127634645}},
{"0gk7412", []float64{-71.1845397949, -71.1831665039, -140.185546875, -140.184173584}},
{"zj2", []float64{74.53125, 75.9375, 135.0, 136.40625}},
{"6jj5vt6zv5", []float64{-16.1856347322, -16.1856293678, -82.7230596542, -82.7230489254}},
{"mdp7x0cy8x", []float64{-33.1294924021, -33.1294870377, 78.0053544044, 78.0053651333}},
{"515", []float64{-84.375, -82.96875, -40.78125, -39.375}},
{"rcrpxqxje", []float64{-36.613740921, -36.6136980057, 178.922095299, 178.922138214}},
{"ydd5n3qr6tu", []float64{59.5979855955, 59.5979869366, 115.595853925, 115.595855266}},
{"hd", []float64{-78.75, -73.125, 22.5, 33.75}},
{"rj4uc", []float64{-16.0400390625, -15.99609375, 138.911132812, 138.955078125}},
{"0r3x", []float64{-47.98828125, -47.8125, -166.640625, -166.2890625}},
{"490rp8g4y", []float64{-83.1399393082, -83.1398963928, -66.8144702911, -66.8144273758}},
{"v19nuj", []float64{54.6514892578, 54.6569824219, 46.58203125, 46.5930175781}},
{"wrg3hx7", []float64{43.8093566895, 43.8107299805, 106.022186279, 106.02355957}},
{"ntzjry4", []float64{-56.7004394531, -56.6990661621, 122.687072754, 122.688446045}},
{"3s5c85t2d", []float64{-22.2170162201, -22.2169733047, -107.219266891, -107.219223976}},
{"wyy", []float64{37.96875, 39.375, 132.1875, 133.59375}},
{"6wk17", []float64{-9.6240234375, -9.580078125, -61.7431640625, -61.69921875}},
{"7m5wqccz7meg", []float64{-15.7654795982, -15.7654794306, -28.5289463773, -28.5289460421}},
{"sk8n5z1qd2", []float64{26.4067554474, 26.4067608118, 11.4166080952, 11.416618824}},
{"kw8hyjjj384", []float64{-7.57417201996, -7.57417067885, 22.7706053853, 22.7706067264}},
{"14rw6bt3vx9v", []float64{-76.2420291267, -76.2420289591, -124.324827231, -124.324826896}},
{"9cdu", []float64{9.140625, 9.31640625, -97.3828125, -97.03125}},
{"0cptj3e6crgm", []float64{-83.4873395227, -83.4873393551, -135.467890911, -135.467890576}},
{"0m", []float64{-61.875, -56.25, -168.75, -157.5}},
{"mx", []float64{-5.625, 0.0, 67.5, 78.75}},
{"p1sfuc", []float64{-81.0736083984, -81.0681152344, 141.888427734, 141.899414062}},
{"03nduncm", []float64{-83.8536643982, -83.8534927368, -159.431877136, -159.431533813}},
{"0m0j", []float64{-60.99609375, -60.8203125, -168.75, -168.3984375}},
{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
{"fn", []float64{78.75, 84.375, -90.0, -78.75}},
{"srddrb0", []float64{42.5830078125, 42.5843811035, 15.1062011719, 15.1075744629}},
{"wpbf1dnj", []float64{43.957157135, 43.9573287964, 91.1288452148, 91.1291885376}},
{"nv8k1f98", []float64{-58.3456420898, -58.3454704285, 124.180526733, 124.180870056}},
{"7upj4ct1", []float64{-21.6126823425, -21.6125106812, -1.27853393555, -1.27819061279}},
{"k312nzxpwm", []float64{-39.3324869871, -39.3324816227, 13.3143246174, 13.3143353462}},
{"s", []float64{0.0, 45.0, 0.0, 45.0}},
{"qure08zn2n", []float64{-20.5611813068, -20.5611759424, 134.328460693, 134.328471422}},
{"m1", []float64{-39.375, -33.75, 45.0, 56.25}},
{"q7wbtv20e", []float64{-25.195684433, -25.1956415176, 110.995001793, 110.995044708}},
{"fsqx0ywr3s", []float64{70.1736903191, 70.1736956835, -58.3177685738, -58.3177578449}},
{"rn9k7k2927vd", []float64{-7.66684871167, -7.66684854403, 136.901339516, 136.901339851}},
{"8ypg", []float64{34.27734375, 34.453125, -135.3515625, -135.0}},
{"42ru", []float64{-87.890625, -87.71484375, -67.8515625, -67.5}},
{"z3efhtnprhw", []float64{53.8177970052, 53.8177983463, 151.729739606, 151.729740947}},
{"c", []float64{45.0, 90.0, -135.0, -90.0}},
{"fu", []float64{67.5, 73.125, -56.25, -45.0}},
{"uz9", []float64{87.1875, 88.59375, 35.15625, 36.5625}},
{"bqdnsr4y4", []float64{82.7445602417, 82.744603157, -165.746870041, -165.746827126}},
{"rshpnngvd", []float64{-21.231508255, -21.2314653397, 163.393907547, 163.393950462}},
{"4egtrvjfe", []float64{-67.9555034637, -67.9554605484, -62.2295236588, -62.2294807434}},
{"w", []float64{0.0, 45.0, 90.0, 135.0}},
{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
{"r2m2yc", []float64{-43.4564208984, -43.4509277344, 153.929443359, 153.940429688}},
{"v9vps2z7", []float64{56.1667442322, 56.1669158936, 74.727973938, 74.7283172607}},
{"dh", []float64{22.5, 28.125, -90.0, -78.75}},
{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
{"7", []float64{-45.0, 0.0, -45.0, 0.0}},
{"rrtp", []float64{-1.58203125, -1.40625, 153.28125, 153.6328125}},
{"57wdet", []float64{-69.8455810547, -69.8400878906, -24.4555664062, -24.4445800781}},
{"sxm3rvsc1x0y", []float64{41.031399183, 41.0313993506, 30.229977183, 30.2299775183}},
{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
{"fmwp70q5", []float64{77.2138023376, 77.213973999, -70.1724243164, -70.1720809937}},
{"vq3whey", []float64{81.2315368652, 81.2329101562, 58.5653686523, 58.5667419434}},
{"vm55829xumn", []float64{73.7443381548, 73.7443394959, 60.4819867015, 60.4819880426}},
{"pc", []float64{-84.375, -78.75, 168.75, 180.0}},
{"76j", []float64{-33.75, -32.34375, -26.71875, -25.3125}},
{"du5md", []float64{23.466796875, 23.5107421875, -51.591796875, -51.5478515625}},
{"0b800r9", []float64{-87.1463012695, -87.1449279785, -146.237640381, -146.23626709}},
{"r96suj", []float64{-37.1063232422, -37.1008300781, 161.19140625, 161.202392578}},
{"dqn86", []float64{33.7939453125, 33.837890625, -69.521484375, -69.4775390625}},
{"62jjysq7fun", []float64{-43.9652466774, -43.9652453363, -71.4243963361, -71.424394995}},
{"s623s4p3v", []float64{12.9312086105, 12.9312515259, 11.7875146866, 11.7875576019}},
{"j9w5", []float64{-81.03515625, -80.859375, 75.9375, 76.2890625}},
{"cku2qh5ee64", []float64{71.7852795124, 71.7852808535, -117.504816949, -117.504815608}},
{"ypmy864vvgs", []float64{86.9358202815, 86.9358216226, 98.1009525061, 98.1009538472}},
{"kwe", []float64{-8.4375, -7.03125, 26.71875, 28.125}},
{"gmq7083dvewj", []float64{75.0604587235, 75.0604588911, -24.9366608262, -24.9366604909}},
{"9er", []float64{18.28125, 19.6875, -102.65625, -101.25}},
{"5p89tmjs9j5", []float64{-47.5205630064, -47.5205616653, -44.0585620701, -44.058560729}},
{"x", []float64{0.0, 45.0, 135.0, 180.0}},
{"ewy", []float64{37.96875, 39.375, -14.0625, -12.65625}},
{"jtgef", []float64{-56.9970703125, -56.953125, 72.509765625, 72.5537109375}},
{"9yjjw", []float64{34.716796875, 34.7607421875, -93.955078125, -93.9111328125}},
{"926", []float64{1.40625, 2.8125, -120.9375, -119.53125}},
{"bz1", []float64{84.375, 85.78125, -144.84375, -143.4375}},
{"yjjpq0ecnve", []float64{74.4023618102, 74.4023631513, 97.3003654182, 97.3003667593}},
{"w5e", []float64{19.6875, 21.09375, 94.21875, 95.625}},
{"hqcn9wtcr", []float64{-50.8527517319, -50.8527088165, 12.7303647995, 12.7304077148}},
{"qfh6xphngs", []float64{-33.2709145546, -33.2709091902, 130.039823055, 130.039833784}},
{"1he586fypp", []float64{-64.0560919046, -64.0560865402, -130.766186714, -130.766175985}},
{"4cc5sh9n3s", []float64{-79.5152020454, -79.515196681, -54.666531086, -54.6665203571}},
{"9y5wfm", []float64{34.9639892578, 34.9694824219, -96.2292480469, -96.2182617188}},
{"c97809", []float64{52.0367431641, 52.0422363281, -107.556152344, -107.545166016}},
{"k9g2nkbm3j5h", []float64{-35.1292287558, -35.1292285882, 27.3453609645, 27.3453612998}},
{"thdwugw196t", []float64{26.5185204148, 26.5185217559, 48.7326653302, 48.7326666713}},
{"34nm41n89c8v", []float64{-32.8655058704, -32.8655057028, -126.114044376, -126.11404404}},
{"buf7qgu", []float64{72.3106384277, 72.3120117188, -142.783813477, -142.782440186}},
{"mhvh0u7f4", []float64{-17.55443573, -17.5543928146, 52.0694446564, 52.0694875717}},
{"t", []float64{0.0, 45.0, 45.0, 90.0}},
{"f0vdwj1bu", []float64{49.6857976913, 49.6858406067, -81.9993782043, -81.999335289}},
{"kcke59", []float64{-37.4359130859, -37.4304199219, 40.2319335938, 40.2429199219}},
{"9rws4p0", []float64{42.9290771484, 42.9304504395, -114.521484375, -114.520111084}},
{"fhj1u03epu", []float64{67.8095269203, 67.8095322847, -82.7905762196, -82.7905654907}},
{"13296d9gwq1", []float64{-82.734657526, -82.7346561849, -122.934338897, -122.934337556}},
{"4j", []float64{-61.875, -56.25, -90.0, -78.75}},
{"gk5u1y2", []float64{68.2374572754, 68.2388305664, -28.3996582031, -28.3982849121}},
{"9v6yrwx00", []float64{30.6655883789, 30.6656312943, -97.0436096191, -97.0435667038}},
{"mc92", []float64{-36.5625, -36.38671875, 80.5078125, 80.859375}},
{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
{"vtzr1we0jh5", []float64{78.6099457741, 78.6099471152, 77.7655689418, 77.7655702829}},
{"ytmmrjr08p", []float64{75.4830640554, 75.4830694199, 120.200042725, 120.200053453}},
{"y7q525c0mgkz", []float64{63.8731999509, 63.8732001185, 109.689126424, 109.68912676}},
{"s5nc", []float64{17.05078125, 17.2265625, 9.4921875, 9.84375}},
{"wk2", []float64{23.90625, 25.3125, 101.25, 102.65625}},
{"f4beky4z04y", []float64{61.0742144287, 61.0742157698, -89.0843501687, -89.0843488276}},
{"ywdu5yj95", []float64{82.2987556458, 82.2987985611, 116.539664268, 116.539707184}},
{"n3", []float64{-84.375, -78.75, 101.25, 112.5}},
{"0334vnb6", []float64{-82.4479293823, -82.4477577209, -167.123680115, -167.123336792}},
{"xg65", []float64{18.80859375, 18.984375, 171.5625, 171.9140625}},
{"0ebmse71br", []float64{-67.9212623835, -67.921257019, -156.946552992, -156.946542263}},
{"ycwd9fc", []float64{53.8920593262, 53.8934326172, 132.968902588, 132.970275879}},
{"0z2gsvd0tfzy", []float64{-48.573201634, -48.5732014664, -144.983568527, -144.983568192}},
{"e041", []float64{0.17578125, 0.3515625, -42.1875, -41.8359375}},
{"ntzdfpdcphj7", []float64{-57.1314592101, -57.1314590424, 123.138849624, 123.138849959}},
{"jx1bfyyhgds1", []float64{-50.4552562349, -50.4552560672, 70.0901824236, 70.0901827589}},
{"dhzuvhgspbew", []float64{27.5804938003, 27.580493968, -78.8766921312, -78.8766917959}},
{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
{"teyjc173t", []float64{22.1116161346, 22.11165905, 75.986123085, 75.9861660004}},
{"bg57uz4rxw5", []float64{62.5739514828, 62.5739528239, -141.467531472, -141.467530131}},
{"52dtfpdc", []float64{-86.1353874207, -86.1352157593, -30.1427078247, -30.142364502}},
{"vx1j39e", []float64{85.3060913086, 85.3074645996, 68.9762878418, 68.9776611328}},
{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
{"psmpz5", []float64{-64.7149658203, -64.7094726562, 164.838867188, 164.849853516}},
{"4xr95eeee", []float64{-49.023141861, -49.0230989456, -56.7943811417, -56.7943382263}},
{"5j", []float64{-61.875, -56.25, -45.0, -33.75}},
{"kpb", []float64{-1.40625, 0.0, 0.0, 1.40625}},
{"dsub48epk", []float64{26.722741127, 26.7227840424, -60.7061576843, -60.706114769}},
{"2urtnwtdw17", []float64{-20.1787023246, -20.1787009835, -135.409665853, -135.409664512}},
{"e6s30gwjxm", []float64{14.2584782839, 14.2584836483, -27.7319276333, -27.7319169044}},
{"qtx", []float64{-14.0625, -12.65625, 122.34375, 123.75}},
{"qj0qvndweq3k", []float64{-15.651620999, -15.6516208313, 90.5748634413, 90.5748637766}},
{"ffetyh28uyj", []float64{60.0967490673, 60.0967504084, -51.0635559261, -51.063554585}},
{"z56t8nwqq7", []float64{64.2848414183, 64.2848467827, 138.52447629, 138.524487019}},
{"7h", []float64{-22.5, -16.875, -45.0, -33.75}},
{"9tuuw1pkyh", []float64{33.1410956383, 33.1411010027, -105.546426773, -105.546416044}},
{"2m", []float64{-16.875, -11.25, -168.75, -157.5}},
{"h7qt", []float64{-70.83984375, -70.6640625, 20.390625, 20.7421875}},
{"t832ztb6psn", []float64{1.57003641129, 1.57003775239, 69.5880755782, 69.5880769193}},
{"wk", []float64{22.5, 28.125, 101.25, 112.5}},
{"ndjbb8w3n", []float64{-78.6152458191, -78.6152029037, 120.616750717, 120.616793633}},
{"14pf3eqg4zd5", []float64{-78.3360836841, -78.3360835165, -124.026254117, -124.026253782}},
{"9j", []float64{28.125, 33.75, -135.0, -123.75}},
{"fr6ng34", []float64{86.9732666016, 86.9746398926, -75.7919311523, -75.7905578613}},
{"p3ggurx2c", []float64{-79.455742836, -79.4556999207, 151.720204353, 151.720247269}},
{"1h1pg1myn06", []float64{-66.1297975481, -66.129796207, -133.453757465, -133.453756124}},
{"cqsue", []float64{82.353515625, 82.3974609375, -116.938476562, -116.89453125}},
{"w", []float64{0.0, 45.0, 90.0, 135.0}},
{"s8jkw", []float64{0.791015625, 0.8349609375, 30.146484375, 30.1904296875}},
{"67", []float64{-28.125, -22.5, -78.75, -67.5}},
{"ywe4mn", []float64{81.9909667969, 81.9964599609, 116.938476562, 116.949462891}},
{"0f5te71q9g", []float64{-77.7655917406, -77.7655863762, -141.183511019, -141.18350029}},
{"v9s6tw70swwv", []float64{53.911406938, 53.9114071056, 73.7225837633, 73.7225840986}},
{"0jbutv", []float64{-56.8377685547, -56.8322753906, -178.692626953, -178.681640625}},
{"bn271bp", []float64{80.68359375, 80.684967041, -179.561920166, -179.560546875}},
{"1vvyth", []float64{-56.4916992188, -56.4862060547, -92.9443359375, -92.9333496094}},
{"7ruk94vup", []float64{-0.59944152832, -0.599398612976, -27.7212953568, -27.7212524414}},
{"3hf", []float64{-18.28125, -16.875, -132.1875, -130.78125}},
{"741rwgds3m4k", []float64{-32.4116574973, -32.4116573296, -42.9420667514, -42.9420664161}},
{"2pye", []float64{-0.87890625, -0.703125, -170.859375, -170.5078125}},
{"2", []float64{-45.0, 0.0, -180.0, -135.0}},
{"e7", []float64{16.875, 22.5, -33.75, -22.5}},
{"f", []float64{45.0, 90.0, -90.0, -45.0}},
{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
{"4c5p5ke", []float64{-83.1198120117, -83.1184387207, -51.8843078613, -51.8829345703}},
{"h7q", []float64{-71.71875, -70.3125, 19.6875, 21.09375}},
{"4fjp8", []float64{-77.431640625, -77.3876953125, -49.21875, -49.1748046875}},
{"p2cbvvvdt8", []float64{-85.6173992157, -85.6173938513, 148.971412182, 148.971422911}},
{"xxjtqz46qmm", []float64{40.3367181122, 40.3367194533, 165.534370691, 165.534372032}},
{"w1e", []float64{8.4375, 9.84375, 94.21875, 95.625}},
{"fxpg4v3e", []float64{84.9316978455, 84.9318695068, -56.4786529541, -56.4783096313}},
{"3be6u", []float64{-41.7041015625, -41.66015625, -96.50390625, -96.4599609375}},
{"9", []float64{0.0, 45.0, -135.0, -90.0}},
{"seqqvkuphz", []float64{19.4951051474, 19.4951105118, 31.5254724026, 31.5254831314}},
{"txy2t7xx", []float64{43.7020683289, 43.7022399902, 76.5300750732, 76.530418396}},
{"s2hc2d2s", []float64{0.232772827148, 0.232944488525, 17.9523468018, 17.9526901245}},
{"8zr0n4f62k", []float64{40.7967638969, 40.7967692614, -136.139477491, -136.139466763}},
{"th1vxpfxnp9", []float64{23.5106107593, 23.5106121004, 47.7722467482, 47.7722480893}},
{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
{"33", []float64{-39.375, -33.75, -123.75, -112.5}},
{"gu", []float64{67.5, 73.125, -11.25, 0.0}},
{"9vq49", []float64{29.970703125, 30.0146484375, -92.7685546875, -92.724609375}},
{"tm", []float64{28.125, 33.75, 56.25, 67.5}},
{"dpzw0p", []float64{44.6868896484, 44.6923828125, -79.453125, -79.4421386719}},
{"gwg12", []float64{83.1884765625, 83.232421875, -18.28125, -18.2373046875}},
{"b8vphv0m5k", []float64{50.4775643349, 50.4775696993, -150.259526968, -150.259516239}},
{"pgpffhw1", []float64{-72.6167106628, -72.6165390015, 179.744567871, 179.744911194}},
{"3r3w", []float64{-3.1640625, -2.98828125, -121.640625, -121.2890625}},
{"u1d", []float64{53.4375, 54.84375, 2.8125, 4.21875}},
{"mznb8v5xu6", []float64{-5.50830245018, -5.50829708576, 88.2801353931, 88.280146122}},
{"8mb57vjrex4", []float64{32.9438298941, 32.9438312352, -168.577842414, -168.577841073}},
{"zm", []float64{73.125, 78.75, 146.25, 157.5}},
{"c9ef6tm74sg", []float64{53.8623873889, 53.86238873, -107.109378129, -107.109376788}},
{"spww", []float64{43.2421875, 43.41796875, 9.140625, 9.4921875}},
{"snp97n", []float64{34.0026855469, 34.0081787109, 10.6787109375, 10.6896972656}},
{"zp9r6emsk8xx", []float64{88.4805002622, 88.4805004299, 136.875432059, 136.875432394}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"18zsh9bg", []float64{-85.0679969788, -85.0678253174, -101.754341125, -101.753997803}},
{"v28", []float64{47.8125, 49.21875, 56.25, 57.65625}},
{"4e", []float64{-73.125, -67.5, -67.5, -56.25}},
{"evn0wp56", []float64{28.2516860962, 28.2518577576, -2.5443649292, -2.54402160645}},
{"uyf9v", []float64{83.2763671875, 83.3203125, 37.4853515625, 37.529296875}},
{"d7", []float64{16.875, 22.5, -78.75, -67.5}},
{"05", []float64{-73.125, -67.5, -180.0, -168.75}},
{"ujj8", []float64{73.125, 73.30078125, 7.734375, 8.0859375}},
{"wcb7n8", []float64{10.37109375, 10.3765869141, 124.387207031, 124.398193359}},
{"r35s2y4e2", []float64{-38.5944128036, -38.5943698883, 151.208267212, 151.208310127}},
{"k", []float64{-45.0, 0.0, 0.0, 45.0}},
{"8tm3h7b1f", []float64{29.7279310226, 29.727973938, -149.930334091, -149.930291176}},
{"3xecw9gsguw3", []float64{-2.53837538883, -2.53837522119, -106.935942136, -106.9359418}},
{"hqs10v", []float64{-53.2342529297, -53.2287597656, 16.9079589844, 16.9189453125}},
{"b21g", []float64{45.52734375, 45.703125, -166.2890625, -165.9375}},
{"vphhpnjt5b", []float64{85.1119422913, 85.1119476557, 50.9403312206, 50.9403419495}},
{"kbd", []float64{-42.1875, -40.78125, 36.5625, 37.96875}},
{"2c", []float64{-39.375, -33.75, -146.25, -135.0}},
{"07ur", []float64{-67.67578125, -67.5, -162.7734375, -162.421875}},
{"8e5ky1", []float64{17.7154541016, 17.7209472656, -152.666015625, -152.655029297}},
{"k2w84t", []float64{-42.1600341797, -42.1545410156, 20.5004882812, 20.5114746094}},
{"p9t4ncex81m", []float64{-81.2014035881, -81.201402247, 164.832694083, 164.832695425}},
{"q67rduzsu6uz", []float64{-30.9984667785, -30.9984666109, 105.951650552, 105.951650888}},
{"udwkypp0v", []float64{59.936041832, 59.9360847473, 31.5625619888, 31.5626049042}},
{"pjsu1q9qg", []float64{-58.3225107193, -58.322467804, 141.7364645, 141.736507416}},
{"2kj2w9b021b", []float64{-22.4024440348, -22.4024426937, -161.081542969, -161.081541628}},
{"5k0", []float64{-67.5, -66.09375, -33.75, -32.34375}},
{"t626vs8j", []float64{13.1652259827, 13.165397644, 56.8432617188, 56.8436050415}},
{"hd0z4zr73", []float64{-77.4791479111, -77.4791049957, 23.6855363846, 23.6855792999}},
{"79gjppfekhhk", []float64{-34.2341917008, -34.2341915332, -17.9700222239, -17.9700218886}},
{"u9u", []float64{54.84375, 56.25, 28.125, 29.53125}},
{"5zbfmj3n30", []float64{-45.9808301926, -45.9808248281, -9.97416973114, -9.9741590023}},
{"1w1nt3g4t9pp", []float64{-55.0973731466, -55.0973729789, -110.858671814, -110.858671479}},
{"f6bh910940", []float64{61.2654304504, 61.2654358149, -78.7052822113, -78.7052714825}},
{"r65q38x", []float64{-32.6486206055, -32.6472473145, 150.895843506, 150.897216797}},
{"xq2", []float64{35.15625, 36.5625, 146.25, 147.65625}},
{"q87xbntvdv8d", []float64{-42.1947657689, -42.1947656013, 117.429890111, 117.429890446}},
{"w1zhgmbpw", []float64{10.7115840912, 10.7116270065, 99.9868297577, 99.986872673}},
{"5n", []float64{-56.25, -50.625, -45.0, -33.75}},
{"9dz", []float64{15.46875, 16.875, -102.65625, -101.25}},
{"n8r794hh15gv", []float64{-87.9668216966, -87.966821529, 122.744798921, 122.744799256}},
{"px78re9", []float64{-49.1555786133, -49.1542053223, 162.752838135, 162.754211426}},
{"3pps", []float64{-4.921875, -4.74609375, -124.453125, -124.1015625}},
{"3s6um", []float64{-20.3466796875, -20.302734375, -108.413085938, -108.369140625}},
{"9dj7zre6t7", []float64{11.9508236647, 11.9508290291, -104.793895483, -104.793884754}},
{"4v1b", []float64{-61.875, -61.69921875, -53.7890625, -53.4375}},
{"1k35z", []float64{-65.4345703125, -65.390625, -122.036132812, -121.9921875}},
{"7z9n57", []float64{-1.74133300781, -1.73583984375, -9.70092773438, -9.68994140625}},
{"3gzg", []float64{-23.37890625, -23.203125, -90.3515625, -90.0}},
{"hy", []float64{-56.25, -50.625, 33.75, 45.0}},
{"2rj6t", []float64{-5.185546875, -5.1416015625, -161.147460938, -161.103515625}},
{"h", []float64{-90.0, -45.0, 0.0, 45.0}},
{"dp44nc1t", []float64{39.7329139709, 39.7330856323, -86.8888092041, -86.8884658813}},
{"0x1", []float64{-50.625, -49.21875, -156.09375, -154.6875}},
{"dmwxxf", []float64{32.2668457031, 32.2723388672, -69.2687988281, -69.2578125}},
{"khy29", []float64{-18.193359375, -18.1494140625, 8.8330078125, 8.876953125}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"phn4", []float64{-67.1484375, -66.97265625, 143.4375, 143.7890625}},
{"qzhvp", []float64{-4.74609375, -4.7021484375, 130.737304688, 130.78125}},
{"3n", []float64{-11.25, -5.625, -135.0, -123.75}},
{"0nx", []float64{-53.4375, -52.03125, -170.15625, -168.75}},
{"19uwx04h21", []float64{-79.0129369497, -79.0129315853, -105.86151123, -105.861500502}},
{"7ur1q", []float64{-20.8740234375, -20.830078125, -1.142578125, -1.0986328125}},
{"8yn6q9vmm", []float64{34.1560220718, 34.1560649872, -137.167868614, -137.167825699}},
{"m4zk", []float64{-28.828125, -28.65234375, 55.1953125, 55.546875}},
{"9bgzpypspd0", []float64{5.48287510872, 5.48287644982, -95.6253647804, -95.6253634393}},
{"y1s", []float64{53.4375, 54.84375, 95.625, 97.03125}},
{"qsyp207nvy", []float64{-17.0042717457, -17.0042663813, 120.941866636, 120.941877365}},
{"rfb", []float64{-29.53125, -28.125, 168.75, 170.15625}},
{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
{"5exm5p63", []float64{-69.3935966492, -69.3934249878, -12.1697616577, -12.169418335}},
{"cnv22fdkruw", []float64{83.0271819234, 83.0271832645, -127.58079797, -127.580796629}},
{"n7vg", []float64{-68.37890625, -68.203125, 109.3359375, 109.6875}},
{"whvgd2h3sz9", []float64{27.3342821002, 27.3342834413, 98.1908561289, 98.19085747}},
{"shbfuzk8vr", []float64{27.2421401739, 27.2421455383, 1.2698328495, 1.26984357834}},
{"44vmk", []float64{-73.6083984375, -73.564453125, -82.44140625, -82.3974609375}},
{"uhd1mfq", []float64{70.5445861816, 70.5459594727, 3.07342529297, 3.07479858398}},
{"7bz", []float64{-40.78125, -39.375, -1.40625, 0.0}},
{"h5b2wkdqpz", []float64{-68.7925726175, -68.7925672531, 0.629643201828, 0.629653930664}},
{"h1", []float64{-84.375, -78.75, 0.0, 11.25}},
{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
{"408bm1", []float64{-87.1380615234, -87.1325683594, -88.7255859375, -88.7145996094}},
{"ggysyy5e2be6", []float64{66.9622308388, 66.9622310065, -1.80790107697, -1.8079007417}},
{"w4u7dn8m9ndw", []float64{16.1206699535, 16.1206701212, 96.0648427159, 96.0648430511}},
{"yq", []float64{78.75, 84.375, 101.25, 112.5}},
{"2nwuht4w", []float64{-7.70587921143, -7.70570755005, -170.306625366, -170.306282043}},
{"v5gqe", []float64{67.236328125, 67.2802734375, 49.7021484375, 49.74609375}},
{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
{"cmehghzjm1", []float64{76.7994600534, 76.7994654179, -119.389586449, -119.38957572}},
{"u207q361d", []float64{45.5784130096, 45.578455925, 11.8790531158, 11.8790960312}},
{"n4pgq345pp", []float64{-78.1726652384, -78.172659874, 101.176142693, 101.176153421}},
{"b2mn8", []float64{47.548828125, 47.5927734375, -161.71875, -161.674804688}},
{"qbe6mp0g8et3", []float64{-41.7529202811, -41.7529201135, 128.541097529, 128.541097865}},
{"sr04m", []float64{39.7705078125, 39.814453125, 11.4697265625, 11.513671875}},
{"hfr0y7u988jx", []float64{-77.1910560317, -77.1910558641, 43.8746168464, 43.8746171817}},
{"jrgxg5qkg7p", []float64{-45.0252610445, -45.0252597034, 61.3124428689, 61.3124442101}},
{"gryut", []float64{89.384765625, 89.4287109375, -24.0380859375, -23.994140625}},
{"14d5b766ppxg", []float64{-75.2600834705, -75.2600833029, -132.173112966, -132.173112631}},
{"0ede2rgwt2", []float64{-69.6975231171, -69.6975177526, -153.968356848, -153.968346119}},
{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
{"dj", []float64{28.125, 33.75, -90.0, -78.75}},
{"xf", []float64{11.25, 16.875, 168.75, 180.0}},
{"szf3p5k9rmx", []float64{43.7876281142, 43.7876294553, 37.228180021, 37.2281813622}},
{"b9fqkhfem7", []float64{55.9690493345, 55.9690546989, -154.156497717, -154.156486988}},
{"t7zw8x5c", []float64{22.2749519348, 22.2751235962, 66.8239974976, 66.8243408203}},
{"f87dmh", []float64{46.8237304688, 46.8292236328, -62.3583984375, -62.3474121094}},
{"yrd1swq12", []float64{87.4857187271, 87.4857616425, 104.268493652, 104.268536568}},
{"s2", []float64{0.0, 5.625, 11.25, 22.5}},
{"q9dhkgwy4kum", []float64{-35.7951473258, -35.7951471582, 115.530612208, 115.530612543}},
{"7dr", []float64{-32.34375, -30.9375, -12.65625, -11.25}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"2rpk2ey43dw", []float64{-4.85693067312, -4.85692933202, -158.524402678, -158.524401337}},
{"3wfeg", []float64{-6.3720703125, -6.328125, -108.852539062, -108.80859375}},
{"ke5k4j", []float64{-27.3944091797, -27.3889160156, 27.158203125, 27.1691894531}},
{"z0xq", []float64{48.8671875, 49.04296875, 145.1953125, 145.546875}},
{"w1sy", []float64{9.4921875, 9.66796875, 96.6796875, 97.03125}},
{"eqm14", []float64{35.33203125, 35.3759765625, -26.630859375, -26.5869140625}},
{"s", []float64{0.0, 45.0, 0.0, 45.0}},
{"hjp9d9f", []float64{-61.6017150879, -61.6003417969, 10.6594848633, 10.6608581543}},
{"p92v0", []float64{-82.08984375, -82.0458984375, 158.5546875, 158.598632812}},
{"36m7g02m", []float64{-31.6823387146, -31.6821670532, -116.23500824, -116.234664917}},
{"5g70e57zjrf", []float64{-71.6117633879, -71.6117620468, -6.89403623343, -6.89403489232}},
{"65rkyfq4se", []float64{-25.8709841967, -25.8709788322, -79.4996237755, -79.4996130466}},
{"eev1", []float64{21.26953125, 21.4453125, -15.46875, -15.1171875}},
{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
{"m75926t", []float64{-27.8915405273, -27.8901672363, 61.1897277832, 61.1911010742}},
{"1kjyeb", []float64{-66.357421875, -66.3519287109, -115.499267578, -115.48828125}},
{"fb8rk2yfwmrp", []float64{49.0914924257, 49.0914925933, -55.7021225989, -55.7021222636}},
{"y2qhd0j8x", []float64{47.1973514557, 47.197394371, 109.783244133, 109.783287048}},
{"m2", []float64{-45.0, -39.375, 56.25, 67.5}},
{"0543np5pgd23", []float64{-72.9094239883, -72.9094238207, -176.567995213, -176.567994878}},
{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
{"d4h5zdhe5gy", []float64{11.9207011163, 11.9207024574, -84.0390613675, -84.0390600264}},
{"9rcd", []float64{43.9453125, 44.12109375, -121.640625, -121.2890625}},
{"ne9nrh75tq3", []float64{-69.1898868978, -69.1898855567, 114.218213707, 114.218215048}},
{"7wk7", []float64{-9.31640625, -9.140625, -16.5234375, -16.171875}},
{"995f97e", []float64{6.08367919922, 6.08505249023, -107.167510986, -107.166137695}},
{"60kmung", []float64{-42.5459289551, -42.5445556641, -83.843536377, -83.8421630859}},
{"845", []float64{11.25, 12.65625, -175.78125, -174.375}},
{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
{"jehdxn0", []float64{-72.6525878906, -72.6512145996, 74.1357421875, 74.1371154785}},
{"y", []float64{45.0, 90.0, 90.0, 135.0}},
{"1d", []float64{-78.75, -73.125, -112.5, -101.25}},
{"rbjy", []float64{-43.9453125, -43.76953125, 176.8359375, 177.1875}},
{"r8qgzf4r9uy", []float64{-42.9222710431, -42.922269702, 167.335936725, 167.335938066}},
{"k5p", []float64{-28.125, -26.71875, 9.84375, 11.25}},
{"f4z7", []float64{60.99609375, 61.171875, -79.8046875, -79.453125}},
{"7rp35b", []float64{-5.44921875, -5.44372558594, -23.3898925781, -23.37890625}},
{"zn71yyn0pbc", []float64{80.4968301952, 80.4968315363, 139.52395454, 139.523955882}},
{"ppj7", []float64{-50.09765625, -49.921875, 142.3828125, 142.734375}},
{"mqv3q", []float64{-6.8115234375, -6.767578125, 63.896484375, 63.9404296875}},
{"tsdtmfq", []float64{26.2477111816, 26.2490844727, 71.276550293, 71.277923584}},
{"72ey8b14uynx", []float64{-41.0444164462, -41.0444162786, -28.4420176595, -28.4420173243}},
{"7qrgb", []float64{-9.1845703125, -9.140625, -22.8515625, -22.8076171875}},
{"w7zmkdpezcm", []float64{22.0282383263, 22.0282396674, 111.653705388, 111.653706729}},
{"kqwr1dh9jdbc", []float64{-7.19585834071, -7.19585817307, 20.1113973185, 20.1113976538}},
{"kv9jx", []float64{-13.095703125, -13.0517578125, 35.4638671875, 35.5078125}},
{"09", []float64{-84.375, -78.75, -157.5, -146.25}},
{"f8ztmmp0", []float64{50.1690673828, 50.1692390442, -56.7127990723, -56.7124557495}},
{"k5dj8cuwbxjg", []float64{-24.3348933198, -24.3348931521, 2.85166796297, 2.85166829824}},
{"xd72j5qndwhn", []float64{12.6752517745, 12.6752519421, 162.298391461, 162.298391797}},
{"esp42d", []float64{22.9064941406, 22.9119873047, -12.6342773438, -12.6232910156}},
{"5sbfys", []float64{-62.7758789062, -62.7703857422, -21.1596679688, -21.1486816406}},
{"8wsz02n", []float64{37.79296875, 37.794342041, -150.801086426, -150.799713135}},
{"zeghw8", []float64{66.884765625, 66.8902587891, 162.004394531, 162.015380859}},
{"u0xg7ug", []float64{48.4098815918, 48.4112548828, 11.0673522949, 11.0687255859}},
{"0jb11", []float64{-57.48046875, -57.4365234375, -179.956054688, -179.912109375}},
{"xv8cwtybm", []float64{31.2328004837, 31.232843399, 170.099816322, 170.099859238}},
{"ef0cwqt7", []float64{11.5498924255, 11.5500640869, -9.91344451904, -9.91310119629}},
{"hrh5k", []float64{-50.0537109375, -50.009765625, 17.05078125, 17.0947265625}},
{"pnpdsx4eb", []float64{-55.7714509964, -55.7714080811, 145.748062134, 145.748105049}},
{"8g2sx4gn", []float64{19.0884017944, 19.0885734558, -145.235137939, -145.234794617}},
{"tsue3yr4z", []float64{27.3248434067, 27.324886322, 73.9149427414, 73.9149856567}},
{"k4vq", []float64{-28.4765625, -28.30078125, 7.3828125, 7.734375}},
{"mr1f1d430h", []float64{-5.26225805283, -5.26225268841, 58.7799453735, 58.7799561024}},
{"dtuqkjybm", []float64{33.4740114212, 33.4740543365, -61.3381719589, -61.3381290436}},
{"p00zpfbj5350", []float64{-88.7535613775, -88.7535612099, 136.39540717, 136.395407505}},
{"n16jy8wrg38", []float64{-81.9539228082, -81.9539214671, 93.106867075, 93.1068684161}},
{"3ckf9t6", []float64{-37.5004577637, -37.4990844727, -94.5016479492, -94.5002746582}},
{"vvch78h7q7", []float64{78.0913943052, 78.0913996696, 80.3161633015, 80.3161740303}},
{"x", []float64{0.0, 45.0, 135.0, 180.0}},
{"qkr8dj44", []float64{-20.9780502319, -20.9778785706, 111.887512207, 111.88785553}},
{"s5dw7s", []float64{20.8081054688, 20.8135986328, 3.66943359375, 3.68041992188}},
{"tpt", []float64{42.1875, 43.59375, 52.03125, 53.4375}},
{"6vqn07ep", []float64{-14.3936347961, -14.3934631348, -47.7973937988, -47.7970504761}},
{"7zbup2", []float64{-0.703125, -0.697631835938, -9.87670898438, -9.86572265625}},
{"xd0j0wrn39f8", []float64{12.1643207967, 12.1643209644, 157.531653419, 157.531653754}},
{"254kywz4", []float64{-27.2526168823, -27.2524452209, -176.540679932, -176.540336609}},
{"6pkmr1875rp", []float64{-3.28710615635, -3.28710481524, -83.7153281271, -83.715326786}},
{"69bmhmbw0de", []float64{-34.2447146773, -34.2447133362, -66.9609577954, -66.9609564543}},
{"47jd", []float64{-72.7734375, -72.59765625, -71.015625, -70.6640625}},
{"mw3ngtnj", []float64{-8.6289024353, -8.62873077393, 69.0682983398, 69.0686416626}},
{"v", []float64{45.0, 90.0, 45.0, 90.0}},
{"4uyq1", []float64{-62.2265625, -62.1826171875, -47.4169921875, -47.373046875}},
{"9748v3e", []float64{17.0150756836, 17.0164489746, -119.999542236, -119.998168945}},
{"sjy7", []float64{32.87109375, 33.046875, 8.7890625, 9.140625}},
{"nc1jb2kb", []float64{-83.3628845215, -83.3627128601, 125.17375946, 125.174102783}},
{"ffryw", []float64{58.798828125, 58.8427734375, -45.087890625, -45.0439453125}},
{"3qfr7scg5s", []float64{-5.7302069664, -5.73020160198, -120.429575443, -120.429564714}},
{"1x", []float64{-50.625, -45.0, -112.5, -101.25}},
{"y", []float64{45.0, 90.0, 90.0, 135.0}},
{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
{"n4wk", []float64{-75.234375, -75.05859375, 98.7890625, 99.140625}},
{"bw2d2", []float64{80.5517578125, 80.595703125, -156.796875, -156.752929688}},
{"ztgehr9mpt", []float64{77.9131776094, 77.9131829739, 162.610681057, 162.610691786}},
{"bnkb", []float64{80.15625, 80.33203125, -173.3203125, -172.96875}},
{"q0fmcn", []float64{-39.7375488281, -39.7320556641, 93.2080078125, 93.2189941406}},
{"e0e1sxt9gwm", []float64{3.11770454049, 3.1177058816, -40.5757860839, -40.5757847428}},
{"9qc", []float64{37.96875, 39.375, -122.34375, -120.9375}},
{"0cybm9snr", []float64{-80.1029920578, -80.1029491425, -136.51031971, -136.510276794}},
{"fp", []float64{84.375, 90.0, -90.0, -78.75}},
{"7u69k7", []float64{-20.8575439453, -20.8520507812, -7.54760742188, -7.53662109375}},
{"guh3mbvnwv7y", []float64{67.7249914035, 67.7249915712, -5.01359079033, -5.01359045506}},
{"vgw4wgnrd58e", []float64{65.1447393559, 65.1447395235, 87.4928004295, 87.4928007647}},
{"rzk732w", []float64{-3.64471435547, -3.64334106445, 174.789733887, 174.791107178}},
{"kf", []float64{-33.75, -28.125, 33.75, 45.0}},
{"rcfr28t0", []float64{-33.8790893555, -33.8789176941, 171.942901611, 171.943244934}},
{"5bqnms", []float64{-87.4731445312, -87.4676513672, -2.57080078125, -2.55981445312}},
{"fs84w", []float64{70.751953125, 70.7958984375, -67.236328125, -67.1923828125}},
{"mcjrsmx", []float64{-38.0264282227, -38.0250549316, 86.3291931152, 86.3305664062}},
{"u84", []float64{45.0, 46.40625, 25.3125, 26.71875}},
{"gkv4g14m", []float64{72.2084999084, 72.2086715698, -26.5838241577, -26.583480835}},
{"27dhxu", []float64{-24.4995117188, -24.4940185547, -165.596923828, -165.5859375}},
{"0v", []float64{-61.875, -56.25, -146.25, -135.0}},
{"bpurn", []float64{89.82421875, 89.8681640625, -173.759765625, -173.715820312}},
{"p5", []float64{-73.125, -67.5, 135.0, 146.25}},
{"f3ffsuh", []float64{55.3051757812, 55.3065490723, -74.6685791016, -74.6672058105}},
{"j0zbr0tb", []float64{-85.7345581055, -85.7343864441, 56.2139511108, 56.2142944336}},
{"vyz", []float64{82.96875, 84.375, 88.59375, 90.0}},
{"082p96b5ey", []float64{-87.2596514225, -87.2596460581, -157.444907427, -157.444896698}},
{"y", []float64{45.0, 90.0, 90.0, 135.0}},
{"g030qs", []float64{46.4721679688, 46.4776611328, -43.3081054688, -43.2971191406}},
{"54", []float64{-78.75, -73.125, -45.0, -33.75}},
{"fp5rcptn2gc", []float64{85.7795964181, 85.7795977592, -85.3788422048, -85.3788408637}},
{"dk8z85s6516h", []float64{26.650436148, 26.6504363157, -77.6893445849, -77.6893442497}},
{"3v1ebh18qujh", []float64{-16.1937826127, -16.193782445, -99.1382686794, -99.1382683441}},
{"un50j3xf9", []float64{78.7586688995, 78.7587118149, 4.46014881134, 4.46019172668}},
{"4b8y3gh", []float64{-86.0723876953, -86.0710144043, -55.1129150391, -55.111541748}},
{"efdgh", []float64{14.58984375, 14.6337890625, -7.20703125, -7.1630859375}},
{"1xxuk2", []float64{-47.0654296875, -47.0599365234, -101.414794922, -101.403808594}},
{"s", []float64{0.0, 45.0, 0.0, 45.0}},
{"1z6", []float64{-49.21875, -47.8125, -98.4375, -97.03125}},
{"s", []float64{0.0, 45.0, 0.0, 45.0}},
{"mxkjr1jdu5ru", []float64{-3.28991509974, -3.2899149321, 73.440352343, 73.4403526783}},
{"x", []float64{0.0, 45.0, 135.0, 180.0}},
{"3kpcn3sm", []float64{-22.315120697, -22.3149490356, -112.57106781, -112.570724487}},
{"buk3t0ctrmt0", []float64{69.1749724746, 69.1749726422, -140.051333159, -140.051332824}},
{"pp", []float64{-50.625, -45.0, 135.0, 146.25}},
{"4h", []float64{-67.5, -61.875, -90.0, -78.75}},
{"fjw1kcg4", []float64{76.1671829224, 76.1673545837, -81.3496398926, -81.3492965698}},
{"877wsvjfz5", []float64{19.4517821074, 19.4517874718, -163.611187935, -163.611177206}},
{"ru3", []float64{-21.09375, -19.6875, 170.15625, 171.5625}},
{"yr", []float64{84.375, 90.0, 101.25, 112.5}},
{"cu5x6cxq", []float64{68.7836837769, 68.7838554382, -96.1973190308, -96.196975708}},
{"w04vuf4bdzjm", []float64{1.02185273543, 1.02185290307, 94.0798293427, 94.0798296779}},
{"8", []float64{0.0, 45.0, -180.0, -135.0}},
{"4zdcmmp2p4", []float64{-47.5652968884, -47.5652915239, -52.1418428421, -52.1418321133}},
{"eft02s1hu", []float64{14.1292333603, 14.1292762756, -4.19523239136, -4.19518947601}},
{"zk4v9qdeg5q1", []float64{68.5031637736, 68.5031639412, 150.175689161, 150.175689496}},
{"8xr", []float64{40.78125, 42.1875, -147.65625, -146.25}},
{"3pxyrt", []float64{-1.68640136719, -1.68090820312, -123.771972656, -123.760986328}},
{"cmh39xszs8", []float64{73.4311580658, 73.4311634302, -117.70080328, -117.700792551}},
{"xrm9d0wb48", []float64{41.047668457, 41.0476738214, 154.081642628, 154.081653357}},
{"d4bh0k", []float64{16.1938476562, 16.1993408203, -89.9890136719, -89.9780273438}},
{"hk8", []float64{-64.6875, -63.28125, 11.25, 12.65625}},
{"9hxqk54m0wn", []float64{26.4285027981, 26.4285041392, -124.625786841, -124.6257855}},
{"mygnv0", []float64{-5.8447265625, -5.83923339844, 83.1884765625, 83.1994628906}},
{"yrjmvs", []float64{85.4077148438, 85.4132080078, 108.874511719, 108.885498047}},
{"52csyemvf12", []float64{-84.9274425209, -84.9274411798, -31.3469982147, -31.3469968736}},
{"4jrvjj", []float64{-59.5623779297, -59.5568847656, -78.8818359375, -78.8708496094}},
{"ys1", []float64{67.5, 68.90625, 113.90625, 115.3125}},
{"unf91", []float64{83.14453125, 83.1884765625, 3.5595703125, 3.603515625}},
{"h5che0vnt", []float64{-68.109998703, -68.1099557877, 1.5451669693, 1.54520988464}},
{"ugrk3", []float64{64.0283203125, 64.072265625, 43.9892578125, 44.033203125}},
{"9ush8c", []float64{26.1090087891, 26.1145019531, -95.5920410156, -95.5810546875}},
{"q92pzb", []float64{-36.6064453125, -36.6009521484, 112.840576172, 112.8515625}},
{"0e", []float64{-73.125, -67.5, -157.5, -146.25}},
{"dbt1mchu", []float64{3.03840637207, 3.03857803345, -48.9595413208, -48.959197998}},
{"98xv2m", []float64{3.76281738281, 3.76831054688, -101.590576172, -101.579589844}},
{"rqd8u195kgu", []float64{-8.29684630036, -8.29684495926, 149.942988753, 149.942990094}},
{"504wk58ccv", []float64{-88.8818138838, -88.8818085194, -41.3074886799, -41.307477951}},
{"0dzjhbn", []float64{-73.65234375, -73.650970459, -147.43927002, -147.437896729}},
{"sgcn", []float64{22.1484375, 22.32421875, 35.15625, 35.5078125}},
{"46k78jw0x65w", []float64{-76.6982056573, -76.6982054897, -72.7648819238, -72.7648815885}},
{"6w2cbxx3nf9", []float64{-9.49474900961, -9.4947476685, -66.4130924642, -66.4130911231}},
{"zxmf4", []float64{86.1328125, 86.1767578125, 165.673828125, 165.717773438}},
{"unf", []float64{82.96875, 84.375, 2.8125, 4.21875}},
{"m4p", []float64{-33.75, -32.34375, 54.84375, 56.25}},
{"dsc1rqss2w", []float64{26.9749438763, 26.9749492407, -65.7689452171, -65.7689344883}},
{"cxp", []float64{84.375, 85.78125, -102.65625, -101.25}},
{"zmh", []float64{73.125, 74.53125, 151.875, 153.28125}},
{"tynvnjc8hdb", []float64{34.6605066955, 34.6605080366, 88.5081124306, 88.5081137717}},
{"uk8hb", []float64{71.1474609375, 71.19140625, 11.25, 11.2939453125}},
{"34d", []float64{-30.9375, -29.53125, -132.1875, -130.78125}},
{"ts39vet4rzw5", []float64{24.2335202359, 24.2335204035, 69.8582813144, 69.8582816496}},
{"3rt1fx5", []float64{-2.46643066406, -2.46505737305, -116.604766846, -116.603393555}},
{"ujn8yhfpg", []float64{73.2842588425, 73.2843017578, 9.40717220306, 9.40721511841}},
{"pdbvhzj", []float64{-73.6138916016, -73.6125183105, 158.770294189, 158.77166748}},
{"q35", []float64{-39.375, -37.96875, 105.46875, 106.875}},
{"szh5424hc", []float64{39.9031591415, 39.9032020569, 39.4766664505, 39.4767093658}},
{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
{"tt1wjkr44e", []float64{29.2033928633, 29.2033982277, 69.8498082161, 69.8498189449}},
{"1u3hdkn", []float64{-65.2807617188, -65.2793884277, -99.7366333008, -99.7352600098}},
{"jc9", []float64{-81.5625, -80.15625, 80.15625, 81.5625}},
{"627pp", []float64{-42.36328125, -42.3193359375, -74.2236328125, -74.1796875}},
{"g46wqb4z", []float64{58.7560844421, 58.7562561035, -41.1839675903, -41.1836242676}},
{"2407674", []float64{-33.1622314453, -33.1608581543, -179.546813965, -179.545440674}},
{"3vbsrcxu", []float64{-11.9002532959, -11.9000816345, -100.195655823, -100.1953125}},
{"u0mr9fpy", []float64{47.7366256714, 47.7367973328, 7.47035980225, 7.470703125}},
{"p1s1", []float64{-81.38671875, -81.2109375, 140.625, 140.9765625}},
{"ce7y6s1ugjpu", []float64{64.4026983529, 64.4026985206, -107.11415682, -107.114156485}},
{"tujn", []float64{23.5546875, 23.73046875, 85.78125, 86.1328125}},
{"fes", []float64{64.6875, 66.09375, -61.875, -60.46875}},
{"28te871t29y", []float64{-41.5548755229, -41.5548741817, -149.752549231, -149.75254789}},
{"2z9j0591", []float64{-1.9141960144, -1.91402435303, -144.842376709, -144.842033386}},
{"e", []float64{0.0, 45.0, -45.0, 0.0}},
{"90", []float64{0.0, 5.625, -135.0, -123.75}},
{"jbfm12r", []float64{-84.900970459, -84.899597168, 81.9786071777, 81.9799804688}},
{"y0ws", []float64{48.515625, 48.69140625, 99.140625, 99.4921875}},
{"m2", []float64{-45.0, -39.375, 56.25, 67.5}},
{"gpspv95sz", []float64{88.5561132431, 88.5561561584, -39.1281938553, -39.1281509399}},
{"7k8u95cyjdx6", []float64{-18.8748412952, -18.8748411275, -32.6487181708, -32.6487178355}},
{"c1fe0r", []float64{55.4095458984, 55.4150390625, -131.473388672, -131.462402344}},
{"668wjecj2d", []float64{-29.8613011837, -29.8612958193, -77.8037810326, -77.8037703037}},
{"dnq3", []float64{35.33203125, 35.5078125, -81.2109375, -80.859375}},
{"m3sxdxnvmrr", []float64{-35.2047483623, -35.2047470212, 62.6974926889, 62.69749403}},
{"zz3qpfvqzu", []float64{86.8522238731, 86.8522292376, 170.855931044, 170.855941772}},
{"98mjjx8bu", []float64{2.3264837265, 2.32652664185, -105.225849152, -105.225806236}},
{"pkmusy0e4j35", []float64{-65.2692317404, -65.2692315727, 154.545451552, 154.545451887}},
{"j3f9dtm5r5n", []float64{-79.8631650209, -79.8631636798, 59.8826631904, 59.8826645315}},
{"67up3c0uh9jn", []float64{-22.6256497577, -22.62564959, -73.0468659103, -73.046865575}},
{"6q0fd9wn2", []float64{-10.8012342453, -10.80119133, -77.5772094727, -77.5771665573}},
{"t82e5zrs", []float64{1.97410583496, 1.97427749634, 68.3782196045, 68.3785629272}},
{"0hstxh", []float64{-63.6987304688, -63.6932373047, -173.364257812, -173.353271484}},
{"qe1egcuetqe", []float64{-27.4555715919, -27.4555702507, 114.78057906, 114.780580401}},
{"yhp25wc4v", []float64{67.5375509262, 67.5375938416, 100.350708961, 100.350751877}},
{"z6uvby2nrt4k", []float64{61.5149248391, 61.5149250068, 152.962971367, 152.962971702}},
{"29sd0863cx", []float64{-36.2092262506, -36.2092208862, -151.146748066, -151.146737337}},
{"kvnx614", []float64{-15.5950927734, -15.5937194824, 42.981262207, 42.982635498}},
{"mu1srk07", []float64{-21.7304420471, -21.7302703857, 81.1783218384, 81.1786651611}},
{"5bz5bmq", []float64{-85.0932312012, -85.0918579102, -1.38702392578, -1.38565063477}},
{"fu4yx9fr8gtk", []float64{68.6534980685, 68.6534982361, -52.0500935242, -52.0500931889}},
{"3hyhj92rn", []float64{-17.5700569153, -17.5700139999, -126.320199966, -126.320157051}},
{"345nw", []float64{-32.607421875, -32.5634765625, -130.517578125, -130.473632812}},
{"q5f2p327mhy", []float64{-23.8988001645, -23.8987988234, 93.4832319617, 93.4832333028}},
{"0wmufb9", []float64{-54.0060424805, -54.0046691895, -149.2918396, -149.290466309}},
{"r", []float64{-45.0, 0.0, 135.0, 180.0}},
{"07d2sde", []float64{-70.2108764648, -70.2095031738, -165.384063721, -165.38269043}},
{"d0r2", []float64{1.40625, 1.58203125, -79.8046875, -79.453125}},
{"znegsexfs23h", []float64{82.1973916143, 82.197391782, 140.482018143, 140.482018478}},
{"sfr69qxg", []float64{13.1319236755, 13.1320953369, 44.010887146, 44.0112304688}},
{"tr44b8brc", []float64{39.8638486862, 39.8638916016, 59.0848588943, 59.0849018097}},
{"tbnqctecsf", []float64{1.21700406075, 1.21700942516, 87.6103341579, 87.6103448868}},
{"jpfy538qu", []float64{-45.3421640396, -45.3421211243, 49.0105247498, 49.0105676651}},
{"u", []float64{45.0, 90.0, 0.0, 45.0}},
{"gskrg0z5e", []float64{70.2732753754, 70.2733182907, -16.3818597794, -16.381816864}},
{"6cz", []float64{-35.15625, -33.75, -46.40625, -45.0}},
{"u67hm7b47423", []float64{58.4243181534, 58.424318321, 15.6995919719, 15.6995923072}},
{"j154zhnnkyt3", []float64{-83.8685209863, -83.8685208187, 49.5348178223, 49.5348181576}},
{"muqdpev4smv", []float64{-20.7211281359, -20.7211267948, 88.2272703946, 88.2272717357}},
{"47h3upynmsru", []float64{-72.7737144381, -72.7737142704, -72.589170076, -72.5891697407}},
{"g6j200", []float64{56.25, 56.2554931641, -26.3671875, -26.3562011719}},
{"tw", []float64{33.75, 39.375, 67.5, 78.75}},
{"c0pjhr520q", []float64{45.9173905849, 45.9173959494, -124.965008497, -124.964997768}},
{"8nx", []float64{36.5625, 37.96875, -170.15625, -168.75}},
{"47b2wvtns", []float64{-68.7870311737, -68.7869882584, -78.0947685242, -78.0947256088}},
{"vrsbq", []float64{87.2314453125, 87.275390625, 63.193359375, 63.2373046875}},
{"sz", []float64{39.375, 45.0, 33.75, 45.0}},
{"xe61b0bnw", []float64{18.5941028595, 18.5941457748, 160.312757492, 160.312800407}},
{"dky6qedz3w", []float64{27.1347606182, 27.1347659826, -69.6714520454, -69.6714413166}},
{"vmvkqx2hb", []float64{78.1314611435, 78.1315040588, 63.9184570312, 63.9184999466}},
{"t96m49xgr1y", []float64{7.9189632833, 7.9189646244, 70.7848772407, 70.7848785818}},
{"brw2urrqcfex", []float64{87.3603346758, 87.3603348434, -159.764133766, -159.764133431}},
{"z7m8", []float64{63.28125, 63.45703125, 153.984375, 154.3359375}},
{"wm6w7f38", []float64{30.6422424316, 30.642414093, 104.932479858, 104.932823181}},
{"rxj23rtt4y", []float64{-5.53896546364, -5.53896009922, 164.945415258, 164.945425987}},
{"sfr9xsyzfn", []float64{12.9473769665, 12.9473823309, 44.6358203888, 44.6358311176}},
{"9ubf9uq02e", []float64{27.1816080809, 27.1816134453, -100.110146999, -100.110136271}},
{"kj25zp1gb6j", []float64{-14.7704637051, -14.770462364, 0.310037881136, 0.31003922224}},
{"x4f", []float64{15.46875, 16.875, 137.8125, 139.21875}},
{"xnn27kkf4c", []float64{33.8176399469, 33.8176453114, 143.938525915, 143.938536644}},
{"61bhs9byn4", []float64{-34.3545806408, -34.3545752764, -89.8009586334, -89.8009479046}},
{"rv2sve92mngr", []float64{-14.6144826896, -14.614482522, 169.696759768, 169.696760103}},
{"zkvq2w", []float64{72.8503417969, 72.8558349609, 153.654785156, 153.665771484}},
{"qprmp68h7kcd", []float64{-3.32535546273, -3.32535529509, 100.514057502, 100.514057837}},
{"77pmzubu", []float64{-27.0874786377, -27.0873069763, -23.2130813599, -23.2127380371}},
{"q73t2sumh3b", []float64{-25.7689382136, -25.7689368725, 103.387366533, 103.387367874}},
{"3kxch9c", []float64{-19.5021057129, -19.5007324219, -112.652435303, -112.651062012}},
{"t", []float64{0.0, 45.0, 45.0, 90.0}},
{"3um1y618chw", []float64{-20.7749935985, -20.7749922574, -93.9419808984, -93.9419795573}},
{"45nj7sxww", []float64{-72.1763134003, -72.1762704849, -81.3981342316, -81.3980913162}},
{"rnkyjdv404", []float64{-8.77360224724, -8.77359688282, 141.928253174, 141.928263903}},
{"p3", []float64{-84.375, -78.75, 146.25, 157.5}},
{"sxbz", []float64{44.82421875, 45.0, 23.5546875, 23.90625}},
{"xuj2k", []float64{22.5439453125, 22.587890625, 176.30859375, 176.352539062}},
{"yhp9", []float64{67.67578125, 67.8515625, 100.546875, 100.8984375}},
{"1yq4", []float64{-54.4921875, -54.31640625, -92.8125, -92.4609375}},
{"u4m2jkw", []float64{57.6809692383, 57.6823425293, 7.62176513672, 7.62313842773}},
{"xb9", []float64{2.8125, 4.21875, 170.15625, 171.5625}},
{"ebf4e478jp", []float64{4.67060029507, 4.67060565948, -8.30064296722, -8.30063223839}},
{"y7venx9", []float64{66.6622924805, 66.6636657715, 109.271392822, 109.272766113}},
{"8qu", []float64{37.96875, 39.375, -163.125, -161.71875}},
{"jw2jbzms66", []float64{-53.7924420834, -53.7924367189, 67.5406086445, 67.5406193733}},
{"n", []float64{-90.0, -45.0, 90.0, 135.0}},
{"jbx", []float64{-87.1875, -85.78125, 88.59375, 90.0}},
{"3v4n", []float64{-15.8203125, -15.64453125, -98.4375, -98.0859375}},
{"0z1theg", []float64{-49.7254943848, -49.7241210938, -143.938751221, -143.93737793}},
{"zbz00jf21m", []float64{49.2503625154, 49.2503678799, 178.596893549, 178.596904278}},
{"dfpq2eg2", []float64{12.3692321777, 12.3694038391, -46.0282516479, -46.0279083252}},
{"z2j5bc1ph562", []float64{45.6658919156, 45.6658920832, 153.315756954, 153.31575729}},
{"3p3g", []float64{-3.69140625, -3.515625, -132.5390625, -132.1875}},
{"4rfgeu3", []float64{-45.7676696777, -45.7662963867, -74.7166442871, -74.7152709961}},
{"nykq", []float64{-53.7890625, -53.61328125, 129.7265625, 130.078125}},
{"h", []float64{-90.0, -45.0, 0.0, 45.0}},
{"85", []float64{16.875, 22.5, -180.0, -168.75}},
{"bdsdxr", []float64{59.5404052734, 59.5458984375, -150.853271484, -150.842285156}},
{"wsyt3duqg2", []float64{27.657866478, 27.6578718424, 121.71251893, 121.712529659}},
{"90", []float64{0.0, 5.625, -135.0, -123.75}},
{"butw", []float64{71.3671875, 71.54296875, -138.515625, -138.1640625}},
{"ddhpjv6b7tqh", []float64{12.5093796104, 12.5093797781, -61.6183796525, -61.6183793172}},
{"18ueqgd", []float64{-85.1907348633, -85.1893615723, -105.872497559, -105.871124268}},
{"v2g8jh1", []float64{49.2407226562, 49.2420959473, 61.3929748535, 61.3943481445}},
{"84umeh3gmupk", []float64{16.45947285, 16.4594730176, -173.888941817, -173.888941482}},
{"s4g900", []float64{15.64453125, 15.6500244141, 4.921875, 4.93286132812}},
{"0b313fz2", []float64{-88.3589172363, -88.358745575, -144.756889343, -144.756546021}},
{"4q", []float64{-56.25, -50.625, -78.75, -67.5}},
{"d61", []float64{11.25, 12.65625, -77.34375, -75.9375}},
{"w5q5298pq", []float64{18.8620233536, 18.8620662689, 98.4597301483, 98.4597730637}},
{"ushgx399", []float64{68.1236457825, 68.1238174438, 29.5003509521, 29.5006942749}},
{"73ngt", []float64{-38.759765625, -38.7158203125, -24.0380859375, -23.994140625}},
{"2f4smcem", []float64{-32.9938316345, -32.9936599731, -142.477226257, -142.476882935}},
{"0", []float64{-90.0, -45.0, -180.0, -135.0}},
{"8", []float64{0.0, 45.0, -180.0, -135.0}},
{"5u14weqgz", []float64{-67.0420503616, -67.0420074463, -9.54853534698, -9.54849243164}},
{"xxhuu8y4xb", []float64{40.214509964, 40.2145153284, 164.386013746, 164.386024475}},
{"272xeqmj", []float64{-25.3652000427, -25.3650283813, -167.897186279, -167.896842957}},
{"2trrhunrd1", []float64{-14.215015769, -14.2150104046, -147.087278366, -147.087267637}},
{"e4", []float64{11.25, 16.875, -45.0, -33.75}},
{"p5duz8tp", []float64{-69.4735908508, -69.4734191895, 139.203643799, 139.203987122}},
{"5qprz78e", []float64{-54.8679542542, -54.8677825928, -23.2353973389, -23.2350540161}},
{"ch5yq40qtu3", []float64{68.6107577384, 68.6107590795, -129.462299198, -129.462297857}},
{"u", []float64{45.0, 90.0, 0.0, 45.0}},
{"7qmv9c5", []float64{-8.87145996094, -8.87008666992, -25.5830383301, -25.5816650391}},
{"c", []float64{45.0, 90.0, -135.0, -90.0}},
{"hp8s7", []float64{-47.0654296875, -47.021484375, 0.8349609375, 0.87890625}},
{"9e4y04d17", []float64{17.9436349869, 17.9436779022, -108.629937172, -108.629894257}},
{"39nh", []float64{-38.671875, -38.49609375, -104.0625, -103.7109375}},
{"6", []float64{-45.0, 0.0, -90.0, -45.0}},
{"pjpxe1pvyzhm", []float64{-60.5501220189, -60.5501218513, 145.689649321, 145.689649656}},
{"drx", []float64{42.1875, 43.59375, -68.90625, -67.5}},
{"zu1c5qg0s", []float64{67.7129459381, 67.7129888535, 171.3580513, 171.358094215}},
{"y", []float64{45.0, 90.0, 90.0, 135.0}},
{"t8d", []float64{2.8125, 4.21875, 70.3125, 71.71875}},
{"d47w70rwe23h", []float64{13.7573739141, 13.7573740818, -84.9358485639, -84.9358482286}},
{"3t617", []float64{-15.2490234375, -15.205078125, -109.555664062, -109.51171875}},
{"qnkq1pz", []float64{-8.74649047852, -8.7451171875, 96.0301208496, 96.0314941406}},
{"fu", []float64{67.5, 73.125, -56.25, -45.0}},
{"7vs", []float64{-14.0625, -12.65625, -5.625, -4.21875}},
{"bztqz0h", []float64{88.3740234375, 88.3753967285, -138.554077148, -138.552703857}},
{"b8j", []float64{45.0, 46.40625, -150.46875, -149.0625}},
{"cetkxmq73", []float64{65.5079126358, 65.5079555511, -104.789958, -104.789915085}},
{"p91", []float64{-84.375, -82.96875, 158.90625, 160.3125}},
{"z4g7bn4w38", []float64{61.1619615555, 61.1619669199, 139.573810101, 139.573820829}},
{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
{"g2eej64jbwj", []float64{48.3518493176, 48.3518506587, -28.5946373641, -28.594636023}},
{"fwshzb30mj", []float64{82.398903966, 82.3989093304, -61.5328359604, -61.5328252316}},
{"fv2mqt", []float64{75.4815673828, 75.4870605469, -55.6127929688, -55.6018066406}},
{"bzr6m3zdun5", []float64{86.1868751049, 86.186876446, -135.813499242, -135.813497901}},
{"et5rq77j8b", []float64{29.4182109833, 29.4182163477, -17.6508772373, -17.6508665085}},
{"1c", []float64{-84.375, -78.75, -101.25, -90.0}},
{"y1hyumh10jq", []float64{51.8391890824, 51.8391904235, 96.8719562888, 96.8719576299}},
{"qd42djnqxq", []float64{-33.6334955692, -33.6334902048, 115.76084733, 115.760858059}},
{"hsd9s", []float64{-64.423828125, -64.3798828125, 26.19140625, 26.2353515625}},
{"8289gq947", []float64{3.156208992, 3.15625190735, -167.902550697, -167.902507782}},
{"em37sw72zq4", []float64{30.1809775829, 30.180978924, -31.7896565795, -31.7896552384}},
{"zms25", []float64{75.9375, 75.9814453125, 152.358398438, 152.40234375}},
{"h25d54", []float64{-89.6374511719, -89.6319580078, 16.3037109375, 16.3146972656}},
{"6qc7y2t4bb", []float64{-6.36885166168, -6.36884629726, -76.7106306553, -76.7106199265}},
{"06vt5z8j", []float64{-73.6102867126, -73.6101150513, -160.850830078, -160.850486755}},
{"37q3", []float64{-26.54296875, -26.3671875, -114.9609375, -114.609375}},
{"sey9wu", []float64{21.3793945312, 21.3848876953, 31.9372558594, 31.9482421875}},
{"qk0jrj", []float64{-21.5496826172, -21.5441894531, 101.557617188, 101.568603516}},
{"8x6jjpm0", []float64{41.6999816895, 41.7001533508, -154.460906982, -154.46056366}},
{"5j1etu", []float64{-61.2377929688, -61.2322998047, -42.6379394531, -42.626953125}},
{"r6b", []float64{-29.53125, -28.125, 146.25, 147.65625}},
{"ddu3vyj07", []float64{15.8093690872, 15.8094120026, -61.263756752, -61.2637138367}},
{"m9fm5q2d91", []float64{-34.2425769567, -34.2425715923, 70.8076143265, 70.8076250553}},
{"0pxdx", []float64{-47.373046875, -47.3291015625, -169.145507812, -169.1015625}},
{"w", []float64{0.0, 45.0, 90.0, 135.0}},
{"q1e", []float64{-36.5625, -35.15625, 94.21875, 95.625}},
{"h3vxhm8tu", []float64{-78.8945817947, -78.8945388794, 19.172000885, 19.1720438004}},
{"bcxsz", []float64{54.2724609375, 54.31640625, -135.395507812, -135.3515625}},
{"crjh", []float64{85.078125, 85.25390625, -116.71875, -116.3671875}},
{"bdqejqqgwj", []float64{58.2185536623, 58.2185590267, -148.119134903, -148.119124174}},
{"x7zhc480u7", []float64{21.9425886869, 21.9425940514, 156.137877703, 156.137888432}},
{"xhr7c9nd6js9", []float64{24.5713387616, 24.5713389292, 145.270248726, 145.270249061}},
{"f25r3r", []float64{46.3128662109, 46.318359375, -74.1247558594, -74.1137695312}},
{"b4v1e8zek", []float64{60.7370996475, 60.7371425629, -172.804470062, -172.804427147}},
{"95cwh1k", []float64{22.1553039551, 22.1566772461, -132.709350586, -132.707977295}},
{"kh1r", []float64{-21.26953125, -21.09375, 1.7578125, 2.109375}},
{"7p", []float64{-5.625, 0.0, -45.0, -33.75}},
{"mgsvj", []float64{-24.43359375, -24.3896484375, 85.6494140625, 85.693359375}},
{"k70", []float64{-28.125, -26.71875, 11.25, 12.65625}},
{"pxjr5g", []float64{-49.3780517578, -49.3725585938, 165.047607422, 165.05859375}},
{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
{"81pv", []float64{6.50390625, 6.6796875, -169.1015625, -168.75}},
{"jjg", []float64{-57.65625, -56.25, 49.21875, 50.625}},
{"732kjtvw", []float64{-37.2330093384, -37.232837677, -33.1491851807, -33.1488418579}},
{"kuc2", []float64{-18.28125, -18.10546875, 35.5078125, 35.859375}},
{"wn91fmw18yp", []float64{36.9006192684, 36.9006206095, 91.5134082735, 91.5134096146}},
{"5wdnzyz5r", []float64{-52.2133398056, -52.2132968903, -19.3370103836, -19.3369674683}},
{"m682wkeu80", []float64{-30.8241176605, -30.8241122961, 56.8813705444, 56.8813812733}},
{"r18jv9k", []float64{-35.5448913574, -35.5435180664, 135.247192383, 135.248565674}},
{"zr079yhvttr", []float64{85.0241656601, 85.0241670012, 146.685235351, 146.685236692}},
{"r4umz4vhvm", []float64{-28.5045593977, -28.5045540333, 141.291271448, 141.291282177}},
{"58gdwpzc3zs", []float64{-85.2989700437, -85.2989687026, -17.3037296534, -17.3037283123}},
{"64frgqpt0yj", []float64{-28.1350958347, -28.1350944936, -86.6827766597, -86.6827753186}},
{"8n18eckkw5", []float64{33.8455456495, 33.8455510139, -177.719736099, -177.71972537}},
{"mz326c81b1", []float64{-4.16625916958, -4.16625380516, 80.6286621094, 80.6286728382}},
{"hx", []float64{-50.625, -45.0, 22.5, 33.75}},
{"ush2juq", []float64{67.5233459473, 67.5247192383, 28.737487793, 28.738861084}},
{"bp6h7m8fe", []float64{86.5589618683, 86.5590047836, -177.04351902, -177.043476105}},
{"111", []float64{-84.375, -82.96875, -133.59375, -132.1875}},
{"m9hwzz", []float64{-38.1500244141, -38.14453125, 74.1687011719, 74.1796875}},
{"100u6e92zuk", []float64{-89.2335520685, -89.2335507274, -133.833394647, -133.833393306}},
{"y", []float64{45.0, 90.0, 90.0, 135.0}},
{"9wbmdmhu", []float64{38.9636993408, 38.9638710022, -112.043037415, -112.042694092}},
{"9p1jg8k", []float64{40.3871154785, 40.3884887695, -133.434448242, -133.433074951}},
{"vqf6kw", []float64{83.3972167969, 83.4027099609, 59.6118164062, 59.6228027344}},
{"gw", []float64{78.75, 84.375, -22.5, -11.25}},
{"h49v9", []float64{-74.970703125, -74.9267578125, 2.5048828125, 2.548828125}},
{"23cmz", []float64{-34.1455078125, -34.1015625, -166.684570312, -166.640625}},
{"71", []float64{-39.375, -33.75, -45.0, -33.75}},
{"5x2kvmbu", []float64{-48.3515167236, -48.3513450623, -21.9166946411, -21.9163513184}},
{"1nywfjs8e", []float64{-50.8144283295, -50.8143854141, -125.765175819, -125.765132904}},
{"7u4vm3b9qy", []float64{-21.5672886372, -21.5672832727, -7.15112328529, -7.15111255646}},
{"rx4n750gn", []float64{-4.50937271118, -4.50932979584, 160.445623398, 160.445666313}},
{"9", []float64{0.0, 45.0, -135.0, -90.0}},
{"nxfyqng3", []float64{-45.2703666687, -45.2701950073, 116.635322571, 116.635665894}},
{"tgnt", []float64{17.75390625, 17.9296875, 87.890625, 88.2421875}},
{"qe2k5jtbm2c", []float64{-25.985365659, -25.9853643179, 112.991521508, 112.991522849}},
{"d", []float64{0.0, 45.0, -90.0, -45.0}},
{"1jhq", []float64{-60.8203125, -60.64453125, -129.0234375, -128.671875}},
{"p874n4ubjm", []float64{-88.2270544767, -88.2270491123, 161.989170313, 161.989181042}},
{"91h2qc", []float64{5.67443847656, 5.67993164062, -128.726806641, -128.715820312}},
{"mzp8s0p", []float64{-5.537109375, -5.53573608398, 89.4822692871, 89.4836425781}},
{"ptp0rem", []float64{-61.8132019043, -61.8118286133, 167.680206299, 167.68157959}},
{"14", []float64{-78.75, -73.125, -135.0, -123.75}},
{"s4dq", []float64{15.1171875, 15.29296875, 3.1640625, 3.515625}},
{"uvs7", []float64{76.46484375, 76.640625, 39.7265625, 40.078125}},
{"wh9xq3mqh", []float64{26.5948104858, 26.5948534012, 92.3914146423, 92.3914575577}},
{"kz", []float64{-5.625, 0.0, 33.75, 45.0}},
{"s8t4hkb3", []float64{3.19032669067, 3.19049835205, 29.7183609009, 29.7187042236}},
{"3ry9w", []float64{-1.142578125, -1.0986328125, -114.345703125, -114.301757812}},
{"mf1wt5", []float64{-32.5909423828, -32.5854492188, 81.0791015625, 81.0900878906}},
{"e", []float64{0.0, 45.0, -45.0, 0.0}},
{"mh", []float64{-22.5, -16.875, 45.0, 56.25}},
{"75y3665k", []float64{-23.6748504639, -23.6746788025, -36.1075973511, -36.1072540283}},
{"sts", []float64{30.9375, 32.34375, 28.125, 29.53125}},
{"6fdmxb", []float64{-29.970703125, -29.9652099609, -52.7453613281, -52.734375}},
{"xcvf9qbx13jx", []float64{10.3214901499, 10.3214903176, 176.891616806, 176.891617142}},
{"n1r6pkxu86t", []float64{-82.5916823745, -82.5916810334, 100.524576455, 100.524577796}},
{"1m2p", []float64{-59.23828125, -59.0625, -123.75, -123.3984375}},
{"fz", []float64{84.375, 90.0, -56.25, -45.0}},
{"hgw", []float64{-70.3125, -68.90625, 42.1875, 43.59375}},
{"ssktp8e5hsub", []float64{24.7884432971, 24.7884434648, 29.1620342061, 29.1620345414}},
{"8wbw4", []float64{39.0234375, 39.0673828125, -156.708984375, -156.665039062}},
{"wbcdsqtpnkh9", []float64{4.69513194636, 4.69513211399, 126.053283289, 126.053283624}},
{"f0md", []float64{46.7578125, 46.93359375, -82.265625, -81.9140625}},
{"hngnmbt2pe4", []float64{-50.9298545122, -50.9298531711, 4.478969872, 4.4789712131}},
{"gbkn8cgjewxc", []float64{47.559420336, 47.5594205037, -5.58776054531, -5.58776021004}},
{"u", []float64{45.0, 90.0, 0.0, 45.0}},
{"b5", []float64{61.875, 67.5, -180.0, -168.75}},
{"w1r042nrqyk", []float64{7.0325280726, 7.0325294137, 99.951505065, 99.9515064061}},
{"tv6r1uuh1h", []float64{30.7885193825, 30.7885247469, 81.9965028763, 81.9965136051}},
{"r1hw8pqpr3", []float64{-38.1913465261, -38.1913411617, 141.336675882, 141.336686611}},
{"j2bcyrxdgj", []float64{-85.4319351912, -85.4319298267, 57.5897741318, 57.5897848606}},
{"m", []float64{-45.0, 0.0, 45.0, 90.0}},
{"qtxvgfvmrbf", []float64{-13.0357463658, -13.0357450247, 123.570777476, 123.570778817}},
{"jp", []float64{-50.625, -45.0, 45.0, 56.25}},
{"f76", []float64{63.28125, 64.6875, -75.9375, -74.53125}},
{"vz9", []float64{87.1875, 88.59375, 80.15625, 81.5625}},
{"wm", []float64{28.125, 33.75, 101.25, 112.5}},
{"c0wn5", []float64{48.8671875, 48.9111328125, -126.430664062, -126.38671875}},
{"7pn7whg", []float64{-4.9836730957, -4.98229980469, -35.943145752, -35.9417724609}},
{"s", []float64{0.0, 45.0, 0.0, 45.0}},
{"txwr3", []float64{43.4619140625, 43.505859375, 76.3330078125, 76.376953125}},
{"zc0", []float64{50.625, 52.03125, 168.75, 170.15625}},
{"sq7pru6gr", []float64{36.4545679092, 36.4546108246, 15.8134031296, 15.8134460449}},
{"nu", []float64{-67.5, -61.875, 123.75, 135.0}},
{"7dkt6vr", []float64{-31.3920593262, -31.3906860352, -16.0414123535, -16.0400390625}},
{"xm2uwefdyf1", []float64{30.3433477879, 30.343349129, 147.594056278, 147.59405762}},
{"mgmnc0", []float64{-25.5322265625, -25.5267333984, 85.8251953125, 85.8361816406}},
{"jj1shq", []float64{-61.1389160156, -61.1334228516, 47.2961425781, 47.3071289062}},
{"3", []float64{-45.0, 0.0, -135.0, -90.0}},
{"0p4y3p8tgr", []float64{-49.4841438532, -49.4841384888, -176.088041067, -176.088030338}},
{"gu", []float64{67.5, 73.125, -11.25, 0.0}},
{"e94", []float64{5.625, 7.03125, -19.6875, -18.28125}},
{"u7khr", []float64{64.0283203125, 64.072265625, 17.1826171875, 17.2265625}},
{"k1k", []float64{-37.96875, -36.5625, 5.625, 7.03125}},
{"wks48m7f", []float64{25.7811355591, 25.7813072205, 106.891136169, 106.891479492}},
{"z91w3", []float64{51.7236328125, 51.767578125, 159.653320312, 159.697265625}},
{"c2d6xmbp1", []float64{48.284740448, 48.2847833633, -120.267291069, -120.267248154}},
{"s9yur", []float64{10.5908203125, 10.634765625, 32.2998046875, 32.34375}},
{"7u09b46", []float64{-22.1800231934, -22.1786499023, -10.544128418, -10.542755127}},
{"8sndxb", []float64{22.939453125, 22.9449462891, -148.018798828, -148.0078125}},
{"j2g761bhsex", []float64{-85.1995566487, -85.1995553076, 60.9084056318, 60.9084069729}},
{"5wg", []float64{-52.03125, -50.625, -18.28125, -16.875}},
{"fzn", []float64{84.375, 85.78125, -47.8125, -46.40625}},
{"ugdpz8p4mu", []float64{66.0502123833, 66.0502177477, 36.9019496441, 36.9019603729}},
{"nx", []float64{-50.625, -45.0, 112.5, 123.75}},
{"d", []float64{0.0, 45.0, -90.0, -45.0}},
{"crr9e8mz59se", []float64{86.0475053452, 86.0475055128, -113.041263744, -113.041263409}},
{"dgmpsg", []float64{19.6160888672, 19.6215820312, -49.0100097656, -48.9990234375}},
{"jcfk52m03", []float64{-79.4517087936, -79.4516658783, 82.063794136, 82.0638370514}},
{"d8trpccuk", []float64{4.05331134796, 4.05335426331, -59.7740364075, -59.7739934921}},
{"93g9665", []float64{10.0744628906, 10.0758361816, -118.725128174, -118.723754883}},
{"sqt7cuf8d0f", []float64{37.2478620708, 37.2478634119, 18.7132385373, 18.7132398784}},
{"f9", []float64{50.625, 56.25, -67.5, -56.25}},
{"k90", []float64{-39.375, -37.96875, 22.5, 23.90625}},
{"k8xdhcv", []float64{-41.8263244629, -41.8249511719, 33.2624816895, 33.2638549805}},
{"4989w4r926t7", []float64{-81.2862400152, -81.2862398475, -66.5228856727, -66.5228853375}},
{"c3", []float64{50.625, 56.25, -123.75, -112.5}},
{"bd908pg0", []float64{59.1929626465, 59.1931343079, -156.089630127, -156.089286804}},
{"bq", []float64{78.75, 84.375, -168.75, -157.5}},
{"chcdt", []float64{72.158203125, 72.2021484375, -132.670898438, -132.626953125}},
{"hff8vsrzhy39", []float64{-74.3748327903, -74.3748326227, 37.5181730837, 37.5181734189}},
{"9gef7g6ezj", []float64{20.101531148, 20.1015365124, -95.8080339432, -95.8080232143}},
{"yc0u2dp", []float64{51.3830566406, 51.3844299316, 124.836273193, 124.837646484}},
{"w0b41f7", []float64{4.58267211914, 4.58404541016, 90.0810241699, 90.0823974609}},
{"8cdmwjc", []float64{9.43588256836, 9.43725585938, -142.820892334, -142.819519043}},
{"p4ngqtjm2", []float64{-78.150343895, -78.1503009796, 144.785041809, 144.785084724}},
{"5", []float64{-90.0, -45.0, -45.0, 0.0}},
{"3qtzqdknx", []float64{-7.14961051941, -7.14956760406, -115.372624397, -115.372581482}},
{"gzjhuv9xmkg5", []float64{85.2414438687, 85.2414440364, -4.00772050023, -4.00772016495}},
{"8g8t7eh4y0fh", []float64{20.6273078173, 20.627307985, -145.387313068, -145.387312733}},
{"39w", []float64{-36.5625, -35.15625, -104.0625, -102.65625}},
{"z34rj8", []float64{51.85546875, 51.8609619141, 149.655761719, 149.666748047}},
{"c9p0zv2", []float64{50.7856750488, 50.7870483398, -102.315673828, -102.314300537}},
{"vh871y", []float64{70.8728027344, 70.8782958984, 45.4284667969, 45.439453125}},
{"7ggt8b4yfw", []float64{-22.9382622242, -22.9382568598, -6.29128217697, -6.29127144814}},
{"3qz3d324z", []float64{-6.76023960114, -6.76019668579, -113.455510139, -113.455467224}},
{"4sm2463w0w", []float64{-66.0803282261, -66.0803228617, -60.0162291527, -60.0162184238}},
{"26ewbu0gw3", []float64{-29.728397727, -29.7283923626, -163.793867826, -163.793857098}},
{"bre7js2w9z", []float64{87.7393430471, 87.7393484116, -163.937226534, -163.937215805}},
{"sy5ug08bn", []float64{34.5877075195, 34.5877504349, 39.1565608978, 39.1566038132}},
{"p4r", []float64{-77.34375, -75.9375, 144.84375, 146.25}},
{"qb", []float64{-45.0, -39.375, 123.75, 135.0}},
{"f4hj", []float64{57.12890625, 57.3046875, -84.375, -84.0234375}},
{"5r0f5t7d5", []float64{-50.2442550659, -50.2442121506, -32.5365686417, -32.5365257263}},
{"2j7n4r6r", []float64{-14.3730354309, -14.3728637695, -175.679283142, -175.678939819}},
{"wu92egv2wsy", []float64{25.4211013019, 25.421102643, 125.680104196, 125.680105537}},
{"vgtwey347bg", []float64{65.8648006618, 65.8648020029, 86.6507081687, 86.6507095098}},
{"q2meny7pbd18", []float64{-43.0307328701, -43.0307327025, 109.285149202, 109.285149537}},
{"rpe", []float64{-2.8125, -1.40625, 139.21875, 140.625}},
{"m69", []float64{-30.9375, -29.53125, 57.65625, 59.0625}},
{"w1zwd8", []float64{10.986328125, 10.9918212891, 100.656738281, 100.667724609}},
{"fzpf", []float64{84.7265625, 84.90234375, -45.3515625, -45.0}},
{"t3w", []float64{8.4375, 9.84375, 64.6875, 66.09375}},
{"zb11", []float64{45.17578125, 45.3515625, 170.15625, 170.5078125}},
{"r2prkmxpgtww", []float64{-43.6940126494, -43.6940124817, 156.641852036, 156.641852371}},
{"zr34g1zcj", []float64{86.274433136, 86.2744760513, 147.79894352, 147.798986435}},
{"19mgdk8", []float64{-82.3287963867, -82.3274230957, -104.315185547, -104.313812256}},
{"mkp", []float64{-22.5, -21.09375, 66.09375, 67.5}},
{"934qy86ssc", []float64{6.81367456913, 6.81367993355, -120.296655893, -120.296645164}},
{"byydj4mrm8", []float64{83.3339166641, 83.3339220285, -136.882202625, -136.882191896}},
{"j", []float64{-90.0, -45.0, 45.0, 90.0}},
{"9cjzqv5n6", []float64{6.92795276642, 6.92799568176, -92.8632259369, -92.8631830215}},
{"vkg1wg6s", []float64{72.0009613037, 72.0011329651, 60.7688140869, 60.7691574097}},
{"ynp42e9x", []float64{79.1659355164, 79.1661071777, 99.8677825928, 99.8681259155}},
{"uv9zwddtwn", []float64{77.2705686092, 77.2705739737, 36.5002727509, 36.5002834797}},
{"t17zkzszpm", []float64{8.3480912447, 8.34809660912, 50.4890120029, 50.4890227318}},
{"tuw779c04ukm", []float64{25.8934257366, 25.8934259042, 87.6943681017, 87.6943684369}},
{"37rm598", []float64{-25.8316040039, -25.8302307129, -113.400878906, -113.399505615}},
{"ymf18", []float64{77.607421875, 77.6513671875, 104.0625, 104.106445312}},
{"gd", []float64{56.25, 61.875, -22.5, -11.25}},
{"smz", []float64{32.34375, 33.75, 21.09375, 22.5}},
{"p", []float64{-90.0, -45.0, 135.0, 180.0}},
{"muzkh95w2u42", []float64{-17.5715374947, -17.571537327, 89.1479081288, 89.1479084641}},
{"hh53c48eg", []float64{-67.1780061722, -67.1779632568, 4.61507320404, 4.61511611938}},
{"739ewv", []float64{-35.9197998047, -35.9143066406, -31.3439941406, -31.3330078125}},
{"cw883", []float64{81.6064453125, 81.650390625, -111.752929688, -111.708984375}},
{"41xu1w37yf", []float64{-80.8243882656, -80.8243829012, -79.0336382389, -79.0336275101}},
{"0y750v2k", []float64{-54.2868804932, -54.2867088318, -141.997947693, -141.99760437}},
{"gqbgw", []float64{83.583984375, 83.6279296875, -32.431640625, -32.3876953125}},
{"pej", []float64{-73.125, -71.71875, 164.53125, 165.9375}},
{"r05t", []float64{-44.12109375, -43.9453125, 139.921875, 140.2734375}},
{"qfuew7wk4f", []float64{-28.8960921764, -28.896086812, 130.361484289, 130.361495018}},
{"nu357yhp72y", []float64{-65.4882533848, -65.4882520437, 125.326685607, 125.326686949}},
{"8qt7rnggz", []float64{37.1715116501, 37.1715545654, -161.054120064, -161.054077148}},
{"dhjq2nz", []float64{23.6357116699, 23.6370849609, -82.6075744629, -82.6062011719}},
{"5s4", []float64{-67.5, -66.09375, -19.6875, -18.28125}},
{"ge8nq842", []float64{65.7861328125, 65.7863044739, -22.211265564, -22.2109222412}},
{"71", []float64{-39.375, -33.75, -45.0, -33.75}},
{"sz59kteks87q", []float64{39.625713788, 39.6257139556, 38.8742895797, 38.874289915}},
{"ur2bh", []float64{85.78125, 85.8251953125, 12.48046875, 12.5244140625}},
{"w140u2f", []float64{5.76095581055, 5.76232910156, 93.0020141602, 93.0033874512}},
{"fpd3zkt6whz4", []float64{87.5202913955, 87.5202915631, -86.5098573267, -86.5098569915}},
{"zmej764h8kb8", []float64{76.8721358478, 76.8721360154, 150.614330247, 150.614330582}},
{"k4p6z", []float64{-33.2666015625, -33.22265625, 10.5029296875, 10.546875}},
{"f8", []float64{45.0, 50.625, -67.5, -56.25}},
{"utsy6pv17m", []float64{77.0789462328, 77.0789515972, 29.2745840549, 29.2745947838}},
{"6z5", []float64{-5.625, -4.21875, -52.03125, -50.625}},
{"mjdc1", []float64{-13.88671875, -13.8427734375, 48.9111328125, 48.955078125}},
{"gjks4c2", []float64{75.2412414551, 75.2426147461, -38.5510253906, -38.5496520996}},
{"fvkvvrh42ju", []float64{75.5808614194, 75.5808627605, -49.3341010809, -49.3340997398}},
{"yp63x30p7u7", []float64{86.0516823828, 86.0516837239, 93.4828309715, 93.4828323126}},
{"6rw", []float64{-2.8125, -1.40625, -70.3125, -68.90625}},
{"28vsqm8sb6", []float64{-40.0031411648, -40.0031358004, -149.490269423, -149.490258694}},
{"be72g2zcek5", []float64{63.4174847603, 63.4174861014, -152.776078731, -152.77607739}},
{"xry6fn699f", []float64{44.1117489338, 44.1117542982, 155.130461454, 155.130472183}},
{"3sf7bw01y4", []float64{-17.5888001919, -17.5887948275, -109.313707352, -109.313696623}},
{"k729yrqr77", []float64{-26.3700467348, -26.3700413704, 12.2365057468, 12.2365164757}},
{"e", []float64{0.0, 45.0, -45.0, 0.0}},
{"63x6dd", []float64{-36.1120605469, -36.1065673828, -68.4448242188, -68.4338378906}},
{"z", []float64{45.0, 90.0, 135.0, 180.0}},
{"mzyj", []float64{-0.52734375, -0.3515625, 87.1875, 87.5390625}},
{"3j6r", []float64{-14.23828125, -14.0625, -131.8359375, -131.484375}},
{"u3q", []float64{52.03125, 53.4375, 19.6875, 21.09375}},
{"nueu7mbnbn4", []float64{-63.9076530933, -63.9076517522, 129.166262448, 129.166263789}},
{"cyq2pkr", []float64{80.1795959473, 80.1809692383, -92.1327209473, -92.1313476562}},
{"gptzzke9hvh", []float64{88.5747224092, 88.5747237504, -36.5904432535, -36.5904419124}},
{"khd", []float64{-19.6875, -18.28125, 2.8125, 4.21875}},
{"ghm92hg6p5", []float64{69.1524285078, 69.1524338722, -37.2608613968, -37.260850668}},
{"n9e20w0", []float64{-81.5295410156, -81.5281677246, 117.092285156, 117.093658447}},
{"826dzs0k", []float64{1.91230773926, 1.91247940063, -164.904441833, -164.904098511}},
{"0d5f2", []float64{-78.3544921875, -78.310546875, -152.2265625, -152.182617188}},
{"70zsyg3", []float64{-39.9284362793, -39.9270629883, -34.1551208496, -34.1537475586}},
{"zykh8gwy", []float64{80.9675216675, 80.9676933289, 174.417228699, 174.417572021}},
{"4spd3s", []float64{-67.0825195312, -67.0770263672, -56.8872070312, -56.8762207031}},
{"r9p6f2t", []float64{-38.8888549805, -38.8874816895, 167.801055908, 167.802429199}},
{"6q3merq7w", []float64{-8.83652687073, -8.83648395538, -76.8405246735, -76.8404817581}},
{"qx1zr32", []float64{-4.34371948242, -4.34234619141, 115.279541016, 115.280914307}},
{"3zfnk", []float64{-0.3076171875, -0.263671875, -98.26171875, -98.2177734375}},
{"kd2e3", []float64{-31.7724609375, -31.728515625, 23.2470703125, 23.291015625}},
{"stkhr6", []float64{30.2893066406, 30.2947998047, 28.4436035156, 28.4545898438}},
{"nh4pzbm", []float64{-66.1363220215, -66.1349487305, 93.159942627, 93.161315918}},
{"zt8tf", []float64{76.9482421875, 76.9921875, 158.291015625, 158.334960938}},
{"gd37", []float64{58.18359375, 58.359375, -20.7421875, -20.390625}},
{"gnx5b45dd", []float64{82.2330951691, 82.2331380844, -35.1513576508, -35.1513147354}},
{"2qm7", []float64{-9.31640625, -9.140625, -161.3671875, -161.015625}},
{"4g0zf0kq9cpp", []float64{-71.7601996846, -71.760199517, -55.1015008986, -55.1015005633}},
{"977tgt", []float64{19.3194580078, 19.3249511719, -118.674316406, -118.663330078}},
{"md0k", []float64{-33.046875, -32.87109375, 67.8515625, 68.203125}},
{"v1q3nvfb3h", []float64{52.2386813164, 52.2386866808, 54.089512825, 54.0895235538}},
{"z96pt6u96w", []float64{53.3649623394, 53.3649677038, 160.549499989, 160.549510717}},
{"pu", []float64{-67.5, -61.875, 168.75, 180.0}},
{"6uydh", []float64{-17.9296875, -17.8857421875, -46.93359375, -46.8896484375}},
{"nx5mt4sk", []float64{-49.6437835693, -49.643611908, 117.295875549, 117.296218872}},
{"nk8jt", []float64{-63.720703125, -63.6767578125, 101.469726562, 101.513671875}},
{"kec1015b", []float64{-23.7249755859, -23.7248039246, 23.9113998413, 23.9117431641}},
{"fk388b", []float64{68.994140625, 68.9996337891, -76.6076660156, -76.5966796875}},
{"nsb", []float64{-63.28125, -61.875, 112.5, 113.90625}},
{"ndbws", []float64{-73.388671875, -73.3447265625, 113.37890625, 113.422851562}},
{"fs5", []float64{67.5, 68.90625, -63.28125, -61.875}},
{"h6x0kbec6p15", []float64{-75.8905554749, -75.8905553073, 21.3077272475, 21.3077275828}},
{"hy78", []float64{-54.84375, -54.66796875, 38.671875, 39.0234375}},
{"vpun9j4ce9", []float64{89.7640568018, 89.7640621662, 50.6728720665, 50.6728827953}},
{"6tyevvqrj665", []float64{-11.9670169987, -11.967016831, -58.0978783965, -58.0978780612}},
{"d3ktekr48n", []float64{8.02185416222, 8.02185952663, -72.2694396973, -72.2694289684}},
{"heyfm2up5", []float64{-68.5054206848, -68.5053777695, 32.2285223007, 32.2285652161}},
{"mn3f7qjr22v", []float64{-9.41403463483, -9.41403329372, 47.6109869778, 47.6109883189}},
{"1wngqx9b", []float64{-55.637512207, -55.6373405457, -102.719764709, -102.719421387}},
{"xc9jv49jej", []float64{9.46294605732, 9.46295142174, 170.3774786, 170.377489328}},
{"27", []float64{-28.125, -22.5, -168.75, -157.5}},
{"6yhqnw", []float64{-10.1623535156, -10.1568603516, -49.9877929688, -49.9768066406}},
{"rmhhu3qd", []float64{-16.0328292847, -16.0326576233, 152.07069397, 152.071037292}},
{"y00b8mhby", []float64{45.1154851913, 45.1155281067, 91.0724544525, 91.0724973679}},
{"yq5tr", []float64{79.6728515625, 79.716796875, 106.479492188, 106.5234375}},
{"cuvxw", []float64{73.037109375, 73.0810546875, -93.251953125, -93.2080078125}},
{"exvb4bcn", []float64{43.5988998413, 43.5990715027, -14.2918395996, -14.2914962769}},
{"uhdpsyx75n0", []float64{71.667112112, 71.6671134531, 3.03132534027, 3.03132668138}},
{"y", []float64{45.0, 90.0, 90.0, 135.0}},
{"qvwbzgw7q", []float64{-13.9108800888, -13.9108371735, 133.591604233, 133.591647148}},
{"u0rqp0bres", []float64{47.466366291, 47.4663716555, 10.503423214, 10.5034339428}},
{"43k2u0vbnrbd", []float64{-82.8327522799, -82.8327521123, -72.5894909352, -72.5894905999}},
{"7r7kwz7xxr", []float64{-3.38658392429, -3.38657855988, -28.8779389858, -28.877928257}},
{"fu2f", []float64{69.2578125, 69.43359375, -55.1953125, -54.84375}},
{"9tkjsvzxw6", []float64{30.5309307575, 30.5309361219, -106.655691862, -106.655681133}},
{"y9x0z", []float64{53.5693359375, 53.61328125, 122.651367188, 122.6953125}},
{"y4rk4b25g", []float64{58.3613920212, 58.3614349365, 100.316290855, 100.316333771}},
{"sxmdmu9xcn", []float64{41.202839613, 41.2028449774, 30.4891633987, 30.4891741276}},
{"fb0", []float64{45.0, 46.40625, -56.25, -54.84375}},
{"7ffkxt5", []float64{-28.7127685547, -28.7113952637, -7.7522277832, -7.75085449219}},
{"n1x5jn4dr", []float64{-81.0018110275, -81.0017681122, 100.067210197, 100.067253113}},
{"uxcdctgmj", []float64{89.1095924377, 89.1096353531, 24.6799707413, 24.6800136566}},
{"h7", []float64{-73.125, -67.5, 11.25, 22.5}},
{"b", []float64{45.0, 90.0, -180.0, -135.0}},
{"us1n4udk", []float64{68.5800933838, 68.5802650452, 24.0301895142, 24.0305328369}},
{"zjtptsj2vn", []float64{77.2779929638, 77.2779983282, 142.280373573, 142.280384302}},
{"x6utsqz", []float64{16.4726257324, 16.4739990234, 152.774505615, 152.775878906}},
{"cs9dn901rqn", []float64{70.6698024273, 70.6698037684, -110.104661286, -110.104659945}},
{"sjbn", []float64{33.3984375, 33.57421875, 0.0, 0.3515625}},
{"0fwxjyc3g9q", []float64{-74.6696452796, -74.6696439385, -136.854814589, -136.854813248}},
{"fk", []float64{67.5, 73.125, -78.75, -67.5}},
{"75hq9jh", []float64{-26.9549560547, -26.9535827637, -38.9739990234, -38.9726257324}},
{"kr3hg1q1", []float64{-3.37675094604, -3.37657928467, 12.7963256836, 12.7966690063}},
{"hfq4d2wu", []float64{-76.9008636475, -76.9006919861, 42.2956466675, 42.2959899902}},
{"rg6ygh", []float64{-25.5102539062, -25.5047607422, 172.749023438, 172.760009766}},
{"995pvrg", []float64{7.02987670898, 7.03125, -108.046417236, -108.045043945}},
{"s5ys", []float64{21.796875, 21.97265625, 9.140625, 9.4921875}},
{"289ucubzj6", []float64{-41.3252341747, -41.3252288103, -154.960902929, -154.9608922}},
{"4", []float64{-90.0, -45.0, -90.0, -45.0}},
{"7g0e3gp7cz", []float64{-27.5365501642, -27.5365447998, -10.4599392414, -10.4599285126}},
{"9suuudg", []float64{27.5688171387, 27.5701904297, -105.618438721, -105.61706543}},
{"8vdt3j8zb0", []float64{31.8918943405, 31.8918997049, -142.689399719, -142.68938899}},
{"cf", []float64{56.25, 61.875, -101.25, -90.0}},
{"jnp33f5pr9", []float64{-56.0180372, -56.0180318356, 55.276658535, 55.2766692638}},
{"czgmgyb", []float64{89.6415710449, 89.6429443359, -96.5148925781, -96.5135192871}},
{"c1kk", []float64{52.734375, 52.91015625, -129.0234375, -128.671875}},
{"kfm4hfe8cp4s", []float64{-31.9782876223, -31.9782874547, 40.994843021, 40.9948433563}},
{"9mnws4hc8h", []float64{29.2788434029, 29.2788487673, -114.427070618, -114.427059889}},
{"t0j7chwg", []float64{0.684413909912, 0.684585571289, 52.4360275269, 52.4363708496}},
{"y", []float64{45.0, 90.0, 90.0, 135.0}},
{"suj", []float64{22.5, 23.90625, 40.78125, 42.1875}},
}
for _, test := range tests {
lat, lon := DecodeGeoHash(test.hash)
if !compareLatitude(test.box, lat) {
t.Errorf("expected lat %f, got %f, hash %s", (test.box[0]+test.box[1])/2, lat, test.hash)
}
if !compareLogitude(test.box, lon) {
t.Errorf("expected lon %f, got %f, hash %s", (test.box[2]+test.box[3])/2, lon, test.hash)
}
}
}
func compareLatitude(box []float64, v float64) bool {
avg := (box[0] + box[1]) / 2
return compareGeo(avg, v) == 0
}
func compareLogitude(box []float64, v float64) bool {
avg := (box[2] + box[3]) / 2
return compareGeo(avg, v) == 0
}
================================================
FILE: go.mod
================================================
module github.com/blevesearch/bleve/v2
go 1.24.0
require (
github.com/RoaringBitmap/roaring/v2 v2.14.5
github.com/bits-and-blooms/bitset v1.24.2
github.com/blevesearch/bleve_index_api v1.3.4
github.com/blevesearch/geo v0.2.5
github.com/blevesearch/go-faiss v1.0.28
github.com/blevesearch/go-metrics v0.0.0-20201227073835-cf1acfcdf475
github.com/blevesearch/go-porterstemmer v1.0.3
github.com/blevesearch/goleveldb v1.0.1
github.com/blevesearch/gtreap v0.1.1
github.com/blevesearch/scorch_segment_api/v2 v2.4.3
github.com/blevesearch/segment v0.9.1
github.com/blevesearch/snowball v0.6.1
github.com/blevesearch/snowballstem v0.9.0
github.com/blevesearch/stempel v0.2.0
github.com/blevesearch/upsidedown_store_api v1.0.2
github.com/blevesearch/vellum v1.2.0
github.com/blevesearch/zapx/v11 v11.4.3
github.com/blevesearch/zapx/v12 v12.4.3
github.com/blevesearch/zapx/v13 v13.4.3
github.com/blevesearch/zapx/v14 v14.4.3
github.com/blevesearch/zapx/v15 v15.4.3
github.com/blevesearch/zapx/v16 v16.3.1
github.com/blevesearch/zapx/v17 v17.0.4
github.com/couchbase/moss v0.2.0
github.com/spf13/cobra v1.10.2
go.etcd.io/bbolt v1.4.0
golang.org/x/text v0.22.0
google.golang.org/protobuf v1.36.6
)
require (
github.com/blevesearch/mmap-go v1.2.0 // indirect
github.com/couchbase/ghistogram v0.1.0 // indirect
github.com/golang/snappy v1.0.0 // indirect
github.com/inconshreveable/mousetrap v1.1.0 // indirect
github.com/json-iterator/go v0.0.0-20171115153421-f7279a603ede // indirect
github.com/mschoch/smat v0.2.0 // indirect
github.com/spf13/pflag v1.0.9 // indirect
golang.org/x/sys v0.40.0 // indirect
)
================================================
FILE: index/scorch/README.md
================================================
# scorch
## Definitions
Batch
- A collection of Documents to mutate in the index.
Document
- Has a unique identifier (arbitrary bytes).
- Is comprised of a list of fields.
Field
- Has a name (string).
- Has a type (text, number, date, geopoint).
- Has a value (depending on type).
- Can be indexed, stored, or both.
- If indexed, can be analyzed.
-m If indexed, can optionally store term vectors.
## Scope
Scorch *MUST* implement the bleve.index API without requiring any changes to this API.
Scorch *MAY* introduce new interfaces, which can be discovered to allow use of new capabilities not in the current API.
## Implementation
The scorch implementation starts with the concept of a segmented index.
A segment is simply a slice, subset, or portion of the entire index. A segmented index is one which is composed of one or more segments. Although segments are created in a particular order, knowing this ordering is not required to achieve correct semantics when querying. Because there is no ordering, this means that when searching an index, you can (and should) search all the segments concurrently.
### Internal Wrapper
In order to accommodate the existing APIs while also improving the implementation, the scorch implementation includes some wrapper functionality that must be described.
#### \_id field
In scorch, field 0 is prearranged to be named \_id. All documents have a value for this field, which is the documents external identifier. In this version the field *MUST* be both indexed AND stored. The scorch wrapper adds this field, as it will not be present in the Document from the calling bleve code.
NOTE: If a document already contains a field \_id, it will be replaced. If this is problematic, the caller must ensure such a scenario does not happen.
### Proposed Structures
```go
type Segment interface {
Dictionary(field string) TermDictionary
}
type TermDictionary interface {
PostingsList(term string, excluding PostingsList) PostingsList
}
type PostingsList interface {
Next() Posting
And(other PostingsList) PostingsList
Or(other PostingsList) PostingsList
}
type Posting interface {
Number() uint64
Frequency() uint64
Norm() float64
Locations() Locations
}
type Locations interface {
Start() uint64
End() uint64
Pos() uint64
ArrayPositions() ...
}
type DeletedDocs {
}
type SegmentSnapshot struct {
segment Segment
deleted PostingsList
}
type IndexSnapshot struct {
segment []SegmentSnapshot
}
```
**What about errors?**
**What about memory mgmnt or context?**
**Postings List separate iterator to separate stateful from stateless**
### Mutating the Index
The bleve.index API has methods for directly making individual mutations (Update/Delete/SetInternal/DeleteInternal), however for this first implementation, we assume that all of these calls can simply be turned into a Batch of size 1. This may be highly inefficient, but it will be correct. This decision is made based on the fact that Couchbase FTS always uses Batches.
NOTE: As a side-effect of this decision, it should be clear that performance tuning may depend on the batch size, which may in-turn require changes in FTS.
From this point forward, only Batch mutations will be discussed.
Sequence of Operations:
1. For each document in the batch, search through all existing segments. The goal is to build up a per-segment bitset which tells us which documents in that segment are obsoleted by the addition of the new segment we're currently building. NOTE: we're not ready for this change to take effect yet, so rather than this operation mutating anything, they simply return bitsets, which we can apply later. Logically, this is something like:
```go
foreach segment {
dict := segment.Dictionary("\_id")
postings := empty postings list
foreach docID {
postings = postings.Or(dict.PostingsList(docID, nil))
}
}
```
NOTE: it is illustrated above as nested for loops, but some or all of these could be concurrently. The end result is that for each segment, we have (possibly empty) bitset.
2. Also concurrent with 1, the documents in the batch are analyzed. This analysis proceeds using the existing analyzer pool.
3. (after 2 completes) Analyzed documents are fed into a function which builds a new Segment representing this information.
4. We now have everything we need to update the state of the system to include this new snapshot.
- Acquire a lock
- Create a new IndexSnapshot
- For each SegmentSnapshot in the IndexSnapshot, take the deleted PostingsList and OR it with the new postings list for this Segment. Construct a new SegmentSnapshot for the segment using this new deleted PostingsList. Append this SegmentSnapshot to the IndexSnapshot.
- Create a new SegmentSnapshot wrapping our new segment with nil deleted docs.
- Append the new SegmentSnapshot to the IndexSnapshot
- Release the lock
An ASCII art example:
```text
0 - Empty Index
No segments
IndexSnapshot
segments []
deleted []
1 - Index Batch [ A B C ]
segment 0
numbers [ 1 2 3 ]
\_id [ A B C ]
IndexSnapshot
segments [ 0 ]
deleted [ nil ]
2 - Index Batch [ B' ]
segment 0 1
numbers [ 1 2 3 ] [ 1 ]
\_id [ A B C ] [ B ]
Compute bitset segment-0-deleted-by-1:
[ 0 1 0 ]
OR it with previous (nil) (call it 0-1)
[ 0 1 0 ]
IndexSnapshot
segments [ 0 1 ]
deleted [ 0-1 nil ]
3 - Index Batch [ C' ]
segment 0 1 2
numbers [ 1 2 3 ] [ 1 ] [ 1 ]
\_id [ A B C ] [ B ] [ C ]
Compute bitset segment-0-deleted-by-2:
[ 0 0 1 ]
OR it with previous ([ 0 1 0 ]) (call it 0-12)
[ 0 1 1 ]
Compute bitset segment-1-deleted-by-2:
[ 0 ]
OR it with previous (nil)
still just nil
IndexSnapshot
segments [ 0 1 2 ]
deleted [ 0-12 nil nil ]
```
**is there opportunity to stop early when doc is found in one segment**
**also, more efficient way to find bits for long lists of ids?**
### Searching
In the bleve.index API all searching starts by getting an IndexReader, which represents a snapshot of the index at a point in time.
As described in the section above, our index implementation maintains a pointer to the current IndexSnapshot. When a caller gets an IndexReader, they get a copy of this pointer, and can use it as long as they like. The IndexSnapshot contains SegmentSnapshots, which only contain pointers to immutable segments. The deleted posting lists associated with a segment change over time, but the particular deleted posting list in YOUR snapshot is immutable. This gives a stable view of the data.
#### Term Search
Term search is the only searching primitive exposed in today's bleve.index API. This ultimately could limit our ability to take advantage of the indexing improvements, but it also means it will be easier to get a first version of this working.
A term search for term T in field F will look something like this:
```go
searchResultPostings = empty
foreach segment {
dict := segment.Dictionary(F)
segmentResultPostings = dict.PostingsList(T, segmentSnapshotDeleted)
// make segmentLocal numbers into global numbers, and flip bits in searchResultPostings
}
```
The searchResultPostings will be a new implementation of the TermFieldReader interface.
As a reminder this interface is:
```go
// TermFieldReader is the interface exposing the enumeration of documents
// containing a given term in a given field. Documents are returned in byte
// lexicographic order over their identifiers.
type TermFieldReader interface {
// Next returns the next document containing the term in this field, or nil
// when it reaches the end of the enumeration. The preAlloced TermFieldDoc
// is optional, and when non-nil, will be used instead of allocating memory.
Next(preAlloced *TermFieldDoc) (*TermFieldDoc, error)
// Advance resets the enumeration at specified document or its immediate
// follower.
Advance(ID IndexInternalID, preAlloced *TermFieldDoc) (*TermFieldDoc, error)
// Count returns the number of documents contains the term in this field.
Count() uint64
Close() error
}
```
At first glance this appears problematic, we have no way to return documents in order of their identifiers. But it turns out the wording of this perhaps too strong, or a bit ambiguous. Originally, this referred to the external identifiers, but with the introduction of a distinction between internal/external identifiers, returning them in order of their internal identifiers is also acceptable. **ASIDE**: the reason for this is that most callers just use Next() and literally don't care what the order is, they could be in any order and it would be fine. There is only one search that cares and that is the ConjunctionSearcher, which relies on Next/Advance having very specific semantics. Later in this document we will have a proposal to split into multiple interfaces:
- The weakest interface, only supports Next() no ordering at all.
- Ordered, supporting Advance()
- And/Or'able capable of internally efficiently doing these ops with like interfaces (if not capable then can always fall back to external walking)
But, the good news is that we don't even have to do that for our first implementation. As long as the global numbers we use for internal identifiers are consistent within this IndexSnapshot, then Next() will be ordered by ascending document number, and Advance() will still work correctly.
NOTE: there is another place where we rely on the ordering of these hits, and that is in the "\_id" sort order. Previously this was the natural order, and a NOOP for the collector, now it must be implemented by actually sorting on the "\_id" field. We probably should introduce at least a marker interface to detect this.
An ASCII art example:
```text
Let's start with the IndexSnapshot we ended with earlier:
3 - Index Batch [ C' ]
segment 0 1 2
numbers [ 1 2 3 ] [ 1 ] [ 1 ]
\_id [ A B C ] [ B ] [ C ]
Compute bitset segment-0-deleted-by-2:
[ 0 0 1 ]
OR it with previous ([ 0 1 0 ]) (call it 0-12)
[ 0 1 1 ]
Compute bitset segment-1-deleted-by-2:
[ 0 0 0 ]
OR it with previous (nil)
still just nil
IndexSnapshot
segments [ 0 1 2 ]
deleted [ 0-12 nil nil ]
Now let's search for the term 'cat' in the field 'desc' and let's assume that Document C (both versions) would match it.
Concurrently:
- Segment 0
- Get Term Dictionary For Field 'desc'
- From it get Postings List for term 'cat' EXCLUDING 0-12
- raw segment matches [ 0 0 1 ] but excluding [ 0 1 1 ] gives [ 0 0 0 ]
- Segment 1
- Get Term Dictionary For Field 'desc'
- From it get Postings List for term 'cat' excluding nil
- [ 0 ]
- Segment 2
- Get Term Dictionary For Field 'desc'
- From it get Postings List for term 'cat' excluding nil
- [ 1 ]
Map local bitsets into global number space (global meaning cross-segment but still unique to this snapshot)
IndexSnapshot already should have mapping something like:
0 - Offset 0
1 - Offset 3 (because segment 0 had 3 docs)
2 - Offset 4 (because segment 1 had 1 doc)
This maps to search result bitset:
[ 0 0 0 0 1]
Caller would call Next() and get doc number 5 (assuming 1 based indexing for now)
Caller could then ask to get term locations, stored fields, external doc ID for document number 5. Internally in the IndexSnapshot, we can now convert that back, and realize doc number 5 comes from segment 2, 5-4=1 so we're looking for doc number 1 in segment 2. That happens to be C...
```
#### Future improvements
In the future, interfaces to detect these non-serially operating TermFieldReaders could expose their own And() and Or() up to the higher level Conjunction/Disjunction searchers. Doing this alone offers some win, but also means there would be greater burden on the Searcher code rewriting logical expressions for maximum performance.
Another related topic is that of peak memory usage. With serially operating TermFieldReaders it was necessary to start them all at the same time and operate in unison. However, with these non-serially operating TermFieldReaders we have the option of doing a few at a time, consolidating them, dispoting the intermediaries, and then doing a few more. For very complex queries with many clauses this could reduce peak memory usage.
### Memory Tracking
All segments must be able to produce two statistics, an estimate of their explicit memory usage, and their actual size on disk (if any). For in-memory segments, disk usage could be zero, and the memory usage represents the entire information content. For mmap-based disk segments, the memory could be as low as the size of tracking structure itself (say just a few pointers).
This would allow the implementation to throttle or block incoming mutations when a threshold memory usage has (or would be) exceeded.
### Persistence
Obviously, we want to support (but maybe not require) asynchronous persistence of segments. My expectation is that segments are initially built in memory. At some point they are persisted to disk. This poses some interesting challenges.
At runtime, the state of an index (it's IndexSnapshot) is not only the contents of the segments, but also the bitmasks of deleted documents. These bitmasks indirectly encode an ordering in which the segments were added. The reason is that the bitmasks encode which items have been obsoleted by other (subsequent or more future) segments. In the runtime implementation we compute bitmask deltas and then merge them at the same time we bring the new segment in. One idea is that we could take a similar approach on disk. When we persist a segment, we persist the bitmask deltas of segments known to exist at that time, and eventually these can get merged up into a base segment deleted bitmask.
This also relates to the topic rollback, addressed next...
### Rollback
One desirable property in the Couchbase ecosystem is the ability to rollback to some previous (though typically not long ago) state. One idea for keeping this property in this design is to protect some of the most recent segments from merging. Then, if necessary, they could be "undone" to reveal previous states of the system. In these scenarios "undone" has to properly undo the deleted bitmasks on the other segments. Again, the current thinking is that rather than "undo" anything, it could be work that was deferred in the first place, thus making it easier to logically undo.
Another possibly related approach would be to tie this into our existing snapshot mechanism. Perhaps simulating a slow reader (holding onto index snapshots) for some period of time, can be the mechanism to achieve the desired end goal.
### Internal Storage
The bleve.index API has support for "internal storage". The ability to store information under a separate name space.
This is not used for high volume storage, so it is tempting to think we could just put a small k/v store alongside the rest of the index. But, the reality is that this storage is used to maintain key information related to the rollback scenario. Because of this, its crucial that ordering and overwriting of key/value pairs correspond with actual segment persistence in the index. Based on this, I believe its important to put the internal key/value pairs inside the segments themselves. But, this also means that they must follow a similar "deleted" bitmask approach to obsolete values in older segments. But, this also seems to substantially increase the complexity of the solution because of the separate name space, it would appear to require its own bitmask. Further keys aren't numeric, which then implies yet another mapping from internal key to number, etc.
More thought is required here.
### Merging
The segmented index approach requires merging to prevent the number of segments from growing too large.
Recent experience with LSMs has taught us that having the correct merge strategy can make a huge difference in the overall performance of the system. In particular, a simple merge strategy which merges segments too aggressively can lead to high write amplification and unnecessarily rendering cached data useless.
A few simple principles have been identified.
- Roughly we merge multiple smaller segments into a single larger one.
- The larger a segment gets the less likely we should be to ever merge it.
- Segments with large numbers of deleted/obsoleted items are good candidates as the merge will result in a space savings.
- Segments with all items deleted/obsoleted can be dropped.
Merging of a segment should be able to proceed even if that segment is held by an ongoing snapshot, it should only delay the removal of it.
================================================
FILE: index/scorch/builder.go
================================================
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"fmt"
"os"
"sync"
"github.com/RoaringBitmap/roaring/v2"
index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"
bolt "go.etcd.io/bbolt"
)
const DefaultBuilderBatchSize = 1000
const DefaultBuilderMergeMax = 10
type Builder struct {
m sync.Mutex
segCount uint64
path string
buildPath string
segPaths []string
batchSize int
mergeMax int
batch *index.Batch
internal map[string][]byte
segPlugin SegmentPlugin
}
func NewBuilder(config map[string]interface{}) (*Builder, error) {
path, ok := config["path"].(string)
if !ok {
return nil, fmt.Errorf("must specify path")
}
buildPathPrefix, _ := config["buildPathPrefix"].(string)
buildPath, err := os.MkdirTemp(buildPathPrefix, "scorch-offline-build")
if err != nil {
return nil, err
}
rv := &Builder{
path: path,
buildPath: buildPath,
mergeMax: DefaultBuilderMergeMax,
batchSize: DefaultBuilderBatchSize,
batch: index.NewBatch(),
segPlugin: defaultSegmentPlugin,
}
err = rv.parseConfig(config)
if err != nil {
return nil, fmt.Errorf("error parsing builder config: %v", err)
}
return rv, nil
}
func (o *Builder) parseConfig(config map[string]interface{}) (err error) {
if v, ok := config["mergeMax"]; ok {
var t int
if t, err = parseToInteger(v); err != nil {
return fmt.Errorf("mergeMax parse err: %v", err)
}
if t > 0 {
o.mergeMax = t
}
}
if v, ok := config["batchSize"]; ok {
var t int
if t, err = parseToInteger(v); err != nil {
return fmt.Errorf("batchSize parse err: %v", err)
}
if t > 0 {
o.batchSize = t
}
}
if v, ok := config["internal"]; ok {
if vinternal, ok := v.(map[string][]byte); ok {
o.internal = vinternal
}
}
forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config)
if err != nil {
return err
}
if forcedSegmentType != "" && forcedSegmentVersion != 0 {
segPlugin, err := chooseSegmentPlugin(forcedSegmentType,
uint32(forcedSegmentVersion))
if err != nil {
return err
}
o.segPlugin = segPlugin
}
return nil
}
// Index will place the document into the index.
// It is invalid to index the same document multiple times.
func (o *Builder) Index(doc index.Document) error {
o.m.Lock()
defer o.m.Unlock()
o.batch.Update(doc)
return o.maybeFlushBatchLOCKED(o.batchSize)
}
func (o *Builder) maybeFlushBatchLOCKED(moreThan int) error {
if len(o.batch.IndexOps) >= moreThan {
defer o.batch.Reset()
return o.executeBatchLOCKED(o.batch)
}
return nil
}
func (o *Builder) executeBatchLOCKED(batch *index.Batch) (err error) {
analysisResults := make([]index.Document, 0, len(batch.IndexOps))
for _, doc := range batch.IndexOps {
if doc != nil {
// insert _id field
doc.AddIDField()
// perform analysis directly
analyze(doc, nil)
analysisResults = append(analysisResults, doc)
}
}
seg, _, err := o.segPlugin.New(analysisResults)
if err != nil {
return fmt.Errorf("error building segment base: %v", err)
}
filename := zapFileName(o.segCount)
o.segCount++
path := o.buildPath + string(os.PathSeparator) + filename
if segUnpersisted, ok := seg.(segment.UnpersistedSegment); ok {
err = segUnpersisted.Persist(path)
if err != nil {
return fmt.Errorf("error persisting segment base to %s: %v", path, err)
}
o.segPaths = append(o.segPaths, path)
return nil
}
return fmt.Errorf("new segment does not implement unpersisted: %T", seg)
}
func (o *Builder) doMerge() error {
// as long as we have more than 1 segment, keep merging
for len(o.segPaths) > 1 {
// merge the next number of segments into one new one
// or, if there are fewer than remaining, merge them all
mergeCount := o.mergeMax
if mergeCount > len(o.segPaths) {
mergeCount = len(o.segPaths)
}
mergePaths := o.segPaths[0:mergeCount]
o.segPaths = o.segPaths[mergeCount:]
// open each of the segments to be merged
mergeSegs := make([]segment.Segment, 0, mergeCount)
// closeOpenedSegs attempts to close all opened
// segments even if an error occurs, in which case
// the first error is returned
closeOpenedSegs := func() error {
var err error
for _, seg := range mergeSegs {
clErr := seg.Close()
if clErr != nil && err == nil {
err = clErr
}
}
return err
}
for _, mergePath := range mergePaths {
seg, err := o.segPlugin.Open(mergePath)
if err != nil {
_ = closeOpenedSegs()
return fmt.Errorf("error opening segment (%s) for merge: %v", mergePath, err)
}
mergeSegs = append(mergeSegs, seg)
}
// do the merge
mergedSegPath := o.buildPath + string(os.PathSeparator) + zapFileName(o.segCount)
drops := make([]*roaring.Bitmap, mergeCount)
_, _, err := o.segPlugin.Merge(mergeSegs, drops, mergedSegPath, nil, nil)
if err != nil {
_ = closeOpenedSegs()
return fmt.Errorf("error merging segments (%v): %v", mergePaths, err)
}
o.segCount++
o.segPaths = append(o.segPaths, mergedSegPath)
// close segments opened for merge
err = closeOpenedSegs()
if err != nil {
return fmt.Errorf("error closing opened segments: %v", err)
}
// remove merged segments
for _, mergePath := range mergePaths {
err = os.RemoveAll(mergePath)
if err != nil {
return fmt.Errorf("error removing segment %s after merge: %v", mergePath, err)
}
}
}
return nil
}
func (o *Builder) Close() error {
o.m.Lock()
defer o.m.Unlock()
// see if there is a partial batch
err := o.maybeFlushBatchLOCKED(1)
if err != nil {
return fmt.Errorf("error flushing batch before close: %v", err)
}
// perform all the merging
err = o.doMerge()
if err != nil {
return fmt.Errorf("error while merging: %v", err)
}
// ensure the store path exists
err = os.MkdirAll(o.path, 0700)
if err != nil {
return err
}
// move final segment into place
// segment id 2 is chosen to match the behavior of a scorch
// index which indexes a single batch of data
finalSegPath := o.path + string(os.PathSeparator) + zapFileName(2)
err = os.Rename(o.segPaths[0], finalSegPath)
if err != nil {
return fmt.Errorf("error moving final segment into place: %v", err)
}
// remove the buildPath, as it is no longer needed
err = os.RemoveAll(o.buildPath)
if err != nil {
return fmt.Errorf("error removing build path: %v", err)
}
// prepare wrapping
seg, err := o.segPlugin.Open(finalSegPath)
if err != nil {
return fmt.Errorf("error opening final segment")
}
// create a segment snapshot for this segment
ss := &SegmentSnapshot{
segment: seg,
}
is := &IndexSnapshot{
epoch: 3, // chosen to match scorch behavior when indexing a single batch
segment: []*SegmentSnapshot{ss},
creator: "scorch-builder",
internal: o.internal,
}
// create the root bolt
rootBoltPath := o.path + string(os.PathSeparator) + "root.bolt"
rootBolt, err := bolt.Open(rootBoltPath, 0600, nil)
if err != nil {
return err
}
// start a write transaction
tx, err := rootBolt.Begin(true)
if err != nil {
return err
}
// fill the root bolt with this fake index snapshot
_, _, err = prepareBoltSnapshot(is, tx, o.path, o.segPlugin, nil, nil)
if err != nil {
_ = tx.Rollback()
_ = rootBolt.Close()
return fmt.Errorf("error preparing bolt snapshot in root.bolt: %v", err)
}
// commit bolt data
err = tx.Commit()
if err != nil {
_ = rootBolt.Close()
return fmt.Errorf("error committing bolt tx in root.bolt: %v", err)
}
// close bolt
err = rootBolt.Close()
if err != nil {
return fmt.Errorf("error closing root.bolt: %v", err)
}
// close final segment
err = seg.Close()
if err != nil {
return fmt.Errorf("error closing final segment: %v", err)
}
return nil
}
================================================
FILE: index/scorch/builder_test.go
================================================
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"context"
"fmt"
"os"
"testing"
"github.com/blevesearch/bleve/v2/document"
index "github.com/blevesearch/bleve_index_api"
)
func TestBuilder(t *testing.T) {
tmpDir, err := os.MkdirTemp("", "scorch-builder-test")
if err != nil {
t.Fatal(err)
}
defer func() {
err = os.RemoveAll(tmpDir)
if err != nil {
t.Fatalf("error cleaning up test index: %v", err)
}
}()
options := map[string]interface{}{
"path": tmpDir,
"batchSize": 2,
"mergeMax": 2,
}
b, err := NewBuilder(options)
if err != nil {
t.Fatal(err)
}
for i := 0; i < 10; i++ {
doc := document.NewDocument(fmt.Sprintf("%d", i))
doc.AddField(document.NewTextField("name", nil, []byte("hello")))
err = b.Index(doc)
if err != nil {
t.Fatal(err)
}
}
err = b.Close()
if err != nil {
t.Fatal(err)
}
checkIndex(t, tmpDir, []byte("hello"), "name", 10)
}
func checkIndex(t *testing.T, path string, term []byte, field string, expectCount int) {
cfg := make(map[string]interface{})
cfg["path"] = path
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatalf("error closing index: %v", err)
}
}()
r, err := idx.Reader()
if err != nil {
t.Fatalf("error accessing index reader: %v", err)
}
defer func() {
err = r.Close()
if err != nil {
t.Fatalf("error closing reader: %v", err)
}
}()
// check the count, expect 10 docs
count, err := r.DocCount()
if err != nil {
t.Errorf("error accessing index doc count: %v", err)
} else if count != uint64(expectCount) {
t.Errorf("expected %d docs, got %d", expectCount, count)
}
// run a search for hello
tfr, err := r.TermFieldReader(context.TODO(), term, field, false, false, false)
if err != nil {
t.Errorf("error accessing term field reader: %v", err)
} else {
var rows int
tfd, err := tfr.Next(nil)
for err == nil && tfd != nil {
rows++
tfd, err = tfr.Next(nil)
}
if err != nil {
t.Errorf("error calling next on term field reader: %v", err)
}
if rows != expectCount {
t.Errorf("expected %d rows for term hello, field name, got %d", expectCount, rows)
}
}
}
func TestBuilderFlushFinalBatch(t *testing.T) {
tmpDir, err := os.MkdirTemp("", "scorch-builder-test")
if err != nil {
t.Fatal(err)
}
defer func() {
err = os.RemoveAll(tmpDir)
if err != nil {
t.Fatalf("error cleaning up test index: %v", err)
}
}()
options := map[string]interface{}{
"path": tmpDir,
"batchSize": 2,
"mergeMax": 2,
}
b, err := NewBuilder(options)
if err != nil {
t.Fatal(err)
}
for i := 0; i < 9; i++ {
doc := document.NewDocument(fmt.Sprintf("%d", i))
doc.AddField(document.NewTextField("name", nil, []byte("hello")))
err = b.Index(doc)
if err != nil {
t.Fatal(err)
}
}
err = b.Close()
if err != nil {
t.Fatal(err)
}
checkIndex(t, tmpDir, []byte("hello"), "name", 9)
}
================================================
FILE: index/scorch/empty.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import segment "github.com/blevesearch/scorch_segment_api/v2"
type emptyPostingsIterator struct{}
func (e *emptyPostingsIterator) Next() (segment.Posting, error) {
return nil, nil
}
func (e *emptyPostingsIterator) Advance(uint64) (segment.Posting, error) {
return nil, nil
}
func (e *emptyPostingsIterator) Size() int {
return 0
}
func (e *emptyPostingsIterator) BytesRead() uint64 {
return 0
}
func (e *emptyPostingsIterator) ResetBytesRead(uint64) {}
func (e *emptyPostingsIterator) BytesWritten() uint64 { return 0 }
var anEmptyPostingsIterator = &emptyPostingsIterator{}
================================================
FILE: index/scorch/event.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import "time"
// RegistryAsyncErrorCallbacks should be treated as read-only after
// process init()'ialization.
var RegistryAsyncErrorCallbacks = map[string]func(error, string){}
// RegistryEventCallbacks should be treated as read-only after
// process init()'ialization.
// In the event of not having a callback, these return true.
var RegistryEventCallbacks = map[string]func(Event) bool{}
// Event represents the information provided in an OnEvent() callback.
type Event struct {
Kind EventKind
Scorch *Scorch
Duration time.Duration
}
// EventKind represents an event code for OnEvent() callbacks.
type EventKind int
const (
// EventKindCloseStart is fired when a Scorch.Close() has begun.
EventKindCloseStart EventKind = iota
// EventKindClose is fired when a scorch index has been fully closed.
EventKindClose
// EventKindMergerProgress is fired when the merger has completed a
// round of merge processing.
EventKindMergerProgress
// EventKindPersisterProgress is fired when the persister has completed
// a round of persistence processing.
EventKindPersisterProgress
// EventKindBatchIntroductionStart is fired when Batch() is invoked which
// introduces a new segment.
EventKindBatchIntroductionStart
// EventKindBatchIntroduction is fired when Batch() completes.
EventKindBatchIntroduction
// EventKindMergeTaskIntroductionStart is fired when the merger is about to
// start the introduction of merged segment from a single merge task.
EventKindMergeTaskIntroductionStart
// EventKindMergeTaskIntroduction is fired when the merger has completed
// the introduction of merged segment from a single merge task.
EventKindMergeTaskIntroduction
// EventKindPreMergeCheck is fired before the merge begins to check if
// the caller should proceed with the merge.
EventKindPreMergeCheck
// EventKindIndexStart is fired when Index() is invoked which
// creates a new Document object from an interface using the index mapping.
EventKindIndexStart
// EventKindPurgerCheck is fired before the purge code is invoked and decides
// whether to execute or not. For unit test purposes
EventKindPurgerCheck
)
================================================
FILE: index/scorch/event_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"testing"
"github.com/blevesearch/bleve/v2/document"
index "github.com/blevesearch/bleve_index_api"
)
func TestEventBatchIntroductionStart(t *testing.T) {
testConfig := CreateConfig("TestEventBatchIntroductionStart")
err := InitTest(testConfig)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(testConfig)
if err != nil {
t.Fatal(err)
}
}()
var count int
RegistryEventCallbacks["test"] = func(e Event) bool {
if e.Kind == EventKindBatchIntroductionStart {
count++
}
return true
}
ourConfig := make(map[string]interface{}, len(testConfig))
for k, v := range testConfig {
ourConfig[k] = v
}
ourConfig["eventCallbackName"] = "test"
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, ourConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
if count != 1 {
t.Fatalf("expected to see 1 batch introduction event event, saw %d", count)
}
}
================================================
FILE: index/scorch/field_dict_test.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/document"
index "github.com/blevesearch/bleve_index_api"
)
func TestIndexFieldDict(t *testing.T) {
cfg := CreateConfig("TestIndexFieldDict")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
cerr := idx.Close()
if cerr != nil {
t.Fatal(cerr)
}
}()
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
doc = document.NewDocument("2")
doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer))
doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), index.IndexField|index.IncludeTermVectors, testAnalyzer))
doc.AddField(document.NewTextFieldCustom("prefix", []uint64{}, []byte("bob cat cats catting dog doggy zoo"), index.IndexField|index.IncludeTermVectors, testAnalyzer))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
dict, err := indexReader.FieldDict("name")
if err != nil {
t.Errorf("error creating reader: %v", err)
}
defer func() {
err := dict.Close()
if err != nil {
t.Fatal(err)
}
}()
termCount := 0
curr, err := dict.Next()
for err == nil && curr != nil {
termCount++
if curr.Term != "test" {
t.Errorf("expected term to be 'test', got '%s'", curr.Term)
}
curr, err = dict.Next()
}
if termCount != 1 {
t.Errorf("expected 1 term for this field, got %d", termCount)
}
dict2, err := indexReader.FieldDict("desc")
if err != nil {
t.Fatalf("error creating reader: %v", err)
}
defer func() {
err := dict2.Close()
if err != nil {
t.Fatal(err)
}
}()
termCount = 0
terms := make([]string, 0)
curr, err = dict2.Next()
for err == nil && curr != nil {
termCount++
terms = append(terms, curr.Term)
curr, err = dict2.Next()
}
if termCount != 3 {
t.Errorf("expected 3 term for this field, got %d", termCount)
}
expectedTerms := []string{"eat", "more", "rice"}
if !reflect.DeepEqual(expectedTerms, terms) {
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
}
// test start and end range
dict3, err := indexReader.FieldDictRange("desc", []byte("fun"), []byte("nice"))
if err != nil {
t.Errorf("error creating reader: %v", err)
}
defer func() {
err := dict3.Close()
if err != nil {
t.Fatal(err)
}
}()
termCount = 0
terms = make([]string, 0)
curr, err = dict3.Next()
for err == nil && curr != nil {
termCount++
terms = append(terms, curr.Term)
curr, err = dict3.Next()
}
if termCount != 1 {
t.Errorf("expected 1 term for this field, got %d", termCount)
}
expectedTerms = []string{"more"}
if !reflect.DeepEqual(expectedTerms, terms) {
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
}
// test use case for prefix
dict4, err := indexReader.FieldDictPrefix("prefix", []byte("cat"))
if err != nil {
t.Errorf("error creating reader: %v", err)
}
defer func() {
err := dict4.Close()
if err != nil {
t.Fatal(err)
}
}()
termCount = 0
terms = make([]string, 0)
curr, err = dict4.Next()
for err == nil && curr != nil {
termCount++
terms = append(terms, curr.Term)
curr, err = dict4.Next()
}
if termCount != 3 {
t.Errorf("expected 3 term for this field, got %d", termCount)
}
expectedTerms = []string{"cat", "cats", "catting"}
if !reflect.DeepEqual(expectedTerms, terms) {
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
}
}
================================================
FILE: index/scorch/int.go
================================================
// Copyright 2014 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.
// This code originated from:
// https://github.com/cockroachdb/cockroach/blob/2dd65dde5d90c157f4b93f92502ca1063b904e1d/pkg/util/encoding/encoding.go
// Modified to not use pkg/errors
package scorch
import "fmt"
const (
// intMin is chosen such that the range of int tags does not overlap the
// ascii character set that is frequently used in testing.
intMin = 0x80 // 128
intMaxWidth = 8
intZero = intMin + intMaxWidth // 136
intSmall = intMax - intZero - intMaxWidth // 109
// intMax is the maximum int tag value.
intMax = 0xfd // 253
)
// encodeUvarintAscending encodes the uint64 value using a variable length
// (length-prefixed) representation. The length is encoded as a single
// byte indicating the number of encoded bytes (-8) to follow. See
// EncodeVarintAscending for rationale. The encoded bytes are appended to the
// supplied buffer and the final buffer is returned.
func encodeUvarintAscending(b []byte, v uint64) []byte {
switch {
case v <= intSmall:
return append(b, intZero+byte(v))
case v <= 0xff:
return append(b, intMax-7, byte(v))
case v <= 0xffff:
return append(b, intMax-6, byte(v>>8), byte(v))
case v <= 0xffffff:
return append(b, intMax-5, byte(v>>16), byte(v>>8), byte(v))
case v <= 0xffffffff:
return append(b, intMax-4, byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
case v <= 0xffffffffff:
return append(b, intMax-3, byte(v>>32), byte(v>>24), byte(v>>16), byte(v>>8),
byte(v))
case v <= 0xffffffffffff:
return append(b, intMax-2, byte(v>>40), byte(v>>32), byte(v>>24), byte(v>>16),
byte(v>>8), byte(v))
case v <= 0xffffffffffffff:
return append(b, intMax-1, byte(v>>48), byte(v>>40), byte(v>>32), byte(v>>24),
byte(v>>16), byte(v>>8), byte(v))
default:
return append(b, intMax, byte(v>>56), byte(v>>48), byte(v>>40), byte(v>>32),
byte(v>>24), byte(v>>16), byte(v>>8), byte(v))
}
}
// decodeUvarintAscending decodes a varint encoded uint64 from the input
// buffer. The remainder of the input buffer and the decoded uint64
// are returned.
func decodeUvarintAscending(b []byte) ([]byte, uint64, error) {
if len(b) == 0 {
return nil, 0, fmt.Errorf("insufficient bytes to decode uvarint value")
}
length := int(b[0]) - intZero
b = b[1:] // skip length byte
if length <= intSmall {
return b, uint64(length), nil
}
length -= intSmall
if length < 0 || length > 8 {
return nil, 0, fmt.Errorf("invalid uvarint length of %d", length)
} else if len(b) < length {
return nil, 0, fmt.Errorf("insufficient bytes to decode uvarint value: %q", b)
}
var v uint64
// It is faster to range over the elements in a slice than to index
// into the slice on each loop iteration.
for _, t := range b[:length] {
v = (v << 8) | uint64(t)
}
return b[length:], v, nil
}
================================================
FILE: index/scorch/int_test.go
================================================
// Copyright 2014 The Cockroach Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
// implied. See the License for the specific language governing
// permissions and limitations under the License.
// This code originated from:
// https://github.com/cockroachdb/cockroach/blob/2dd65dde5d90c157f4b93f92502ca1063b904e1d/pkg/util/encoding/encoding_test.go
// Modified to only test the parts we borrowed
package scorch
import (
"bytes"
"math"
"testing"
)
type testCaseUint64 struct {
value uint64
expEnc []byte
}
func TestEncodeDecodeUvarint(t *testing.T) {
testBasicEncodeDecodeUint64(encodeUvarintAscending, decodeUvarintAscending, false, t)
testCases := []testCaseUint64{
{0, []byte{0x88}},
{1, []byte{0x89}},
{109, []byte{0xf5}},
{110, []byte{0xf6, 0x6e}},
{1 << 8, []byte{0xf7, 0x01, 0x00}},
{math.MaxUint64, []byte{0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff}},
}
testCustomEncodeUint64(testCases, encodeUvarintAscending, t)
}
func testBasicEncodeDecodeUint64(
encFunc func([]byte, uint64) []byte,
decFunc func([]byte) ([]byte, uint64, error),
descending bool, t *testing.T,
) {
testCases := []uint64{
0, 1,
1<<8 - 1, 1 << 8,
1<<16 - 1, 1 << 16,
1<<24 - 1, 1 << 24,
1<<32 - 1, 1 << 32,
1<<40 - 1, 1 << 40,
1<<48 - 1, 1 << 48,
1<<56 - 1, 1 << 56,
math.MaxUint64 - 1, math.MaxUint64,
}
var lastEnc []byte
for i, v := range testCases {
enc := encFunc(nil, v)
if i > 0 {
if (descending && bytes.Compare(enc, lastEnc) >= 0) ||
(!descending && bytes.Compare(enc, lastEnc) < 0) {
t.Errorf("ordered constraint violated for %d: [% x] vs. [% x]", v, enc, lastEnc)
}
}
b, decode, err := decFunc(enc)
if err != nil {
t.Error(err)
continue
}
if len(b) != 0 {
t.Errorf("leftover bytes: [% x]", b)
}
if decode != v {
t.Errorf("decode yielded different value than input: %d vs. %d", decode, v)
}
lastEnc = enc
}
}
func testCustomEncodeUint64(
testCases []testCaseUint64, encFunc func([]byte, uint64) []byte, t *testing.T,
) {
for _, test := range testCases {
enc := encFunc(nil, test.value)
if !bytes.Equal(enc, test.expEnc) {
t.Errorf("expected [% x]; got [% x] (value: %d)", test.expEnc, enc, test.value)
}
}
}
================================================
FILE: index/scorch/introducer.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"fmt"
"path/filepath"
"sync/atomic"
"github.com/RoaringBitmap/roaring/v2"
index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"
)
const introducer = "introducer"
type segmentIntroduction struct {
id uint64
data segment.Segment
obsoletes map[uint64]*roaring.Bitmap
ids []string
internal map[string][]byte
stats *fieldStats
applied chan error
persisted chan error
persistedCallback index.BatchCallback
}
type persistIntroduction struct {
persisted map[uint64]segment.Segment
applied notificationChan
}
type epochWatcher struct {
epoch uint64
notifyCh notificationChan
}
func (s *Scorch) introducerLoop() {
defer func() {
if r := recover(); r != nil {
s.fireAsyncError(NewScorchError(
introducer,
fmt.Sprintf("panic: %v, path: %s", r, s.path),
ErrAsyncPanic,
))
}
s.asyncTasks.Done()
}()
var epochWatchers []*epochWatcher
OUTER:
for {
atomic.AddUint64(&s.stats.TotIntroduceLoop, 1)
select {
case <-s.closeCh:
break OUTER
case epochWatcher := <-s.introducerNotifier:
epochWatchers = append(epochWatchers, epochWatcher)
case nextMerge := <-s.merges:
s.introduceMerge(nextMerge)
case next := <-s.introductions:
err := s.introduceSegment(next)
if err != nil {
continue OUTER
}
case persist := <-s.persists:
s.introducePersist(persist)
}
var epochCurr uint64
s.rootLock.RLock()
if s.root != nil {
epochCurr = s.root.epoch
}
s.rootLock.RUnlock()
var epochWatchersNext []*epochWatcher
for _, w := range epochWatchers {
if w.epoch < epochCurr {
close(w.notifyCh)
} else {
epochWatchersNext = append(epochWatchersNext, w)
}
}
epochWatchers = epochWatchersNext
}
}
func (s *Scorch) introduceSegment(next *segmentIntroduction) error {
atomic.AddUint64(&s.stats.TotIntroduceSegmentBeg, 1)
defer atomic.AddUint64(&s.stats.TotIntroduceSegmentEnd, 1)
s.rootLock.RLock()
root := s.root
root.AddRef()
s.rootLock.RUnlock()
defer func() { _ = root.DecRef() }()
nsegs := len(root.segment)
// prepare new index snapshot
newSnapshot := &IndexSnapshot{
parent: s,
segment: make([]*SegmentSnapshot, 0, nsegs+1),
offsets: make([]uint64, 0, nsegs+1),
internal: make(map[string][]byte, len(root.internal)),
refs: 1,
creator: "introduceSegment",
}
// iterate through current segments
var running uint64
var docsToPersistCount, memSegments, fileSegments uint64
var droppedSegmentFiles []string
for i := range root.segment {
// see if optimistic work included this segment
delta, ok := next.obsoletes[root.segment[i].id]
if !ok {
var err error
delta, err = root.segment[i].segment.DocNumbers(next.ids)
if err != nil {
next.applied <- fmt.Errorf("error computing doc numbers: %v", err)
close(next.applied)
_ = newSnapshot.DecRef()
return err
}
}
newss := &SegmentSnapshot{
id: root.segment[i].id,
segment: root.segment[i].segment,
stats: root.segment[i].stats,
cachedDocs: root.segment[i].cachedDocs,
cachedMeta: root.segment[i].cachedMeta,
creator: root.segment[i].creator,
}
// apply new obsoletions
if root.segment[i].deleted == nil {
newss.deleted = delta
} else {
if delta.IsEmpty() {
newss.deleted = root.segment[i].deleted
} else {
newss.deleted = roaring.Or(root.segment[i].deleted, delta)
}
}
if newss.deleted.IsEmpty() {
newss.deleted = nil
}
// update the deleted bitmap to include any nested/sub-documents as well
// if the segment supports that
if ns, ok := newss.segment.(segment.NestedSegment); ok {
newss.deleted = ns.AddNestedDocuments(newss.deleted)
}
// check for live size before copying
if newss.LiveSize() > 0 {
newSnapshot.segment = append(newSnapshot.segment, newss)
root.segment[i].segment.AddRef()
newSnapshot.offsets = append(newSnapshot.offsets, running)
running += newss.segment.Count()
} else if seg, ok := newss.segment.(segment.PersistedSegment); ok {
droppedSegmentFiles = append(droppedSegmentFiles,
filepath.Base(seg.Path()))
}
if isMemorySegment(root.segment[i]) {
docsToPersistCount += root.segment[i].Count()
memSegments++
} else {
fileSegments++
}
}
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
// append new segment, if any, to end of the new index snapshot
if next.data != nil {
newSegmentSnapshot := &SegmentSnapshot{
id: next.id,
segment: next.data, // take ownership of next.data's ref-count
stats: next.stats,
cachedDocs: &cachedDocs{cache: nil},
cachedMeta: &cachedMeta{meta: nil},
creator: "introduceSegment",
}
newSnapshot.segment = append(newSnapshot.segment, newSegmentSnapshot)
newSnapshot.offsets = append(newSnapshot.offsets, running)
// increment numItemsIntroduced which tracks the number of items
// queued for persistence.
atomic.AddUint64(&s.stats.TotIntroducedItems, newSegmentSnapshot.Count())
atomic.AddUint64(&s.stats.TotIntroducedSegmentsBatch, 1)
}
// copy old values
for key, oldVal := range root.internal {
newSnapshot.internal[key] = oldVal
}
// set new values and apply deletes
for key, newVal := range next.internal {
if newVal != nil {
newSnapshot.internal[key] = newVal
} else {
delete(newSnapshot.internal, key)
}
}
newSnapshot.updateSize()
s.rootLock.Lock()
if next.persisted != nil {
s.rootPersisted = append(s.rootPersisted, next.persisted)
}
if next.persistedCallback != nil {
s.persistedCallbacks = append(s.persistedCallbacks, next.persistedCallback)
}
// swap in new index snapshot
newSnapshot.epoch = s.nextSnapshotEpoch
s.nextSnapshotEpoch++
rootPrev := s.root
s.root = newSnapshot
atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
// release lock
s.rootLock.Unlock()
if rootPrev != nil {
_ = rootPrev.DecRef()
}
// update the removal eligibility for those segment files
// that are not a part of the latest root.
for _, filename := range droppedSegmentFiles {
s.unmarkIneligibleForRemoval(filename)
}
close(next.applied)
return nil
}
func (s *Scorch) introducePersist(persist *persistIntroduction) {
atomic.AddUint64(&s.stats.TotIntroducePersistBeg, 1)
defer atomic.AddUint64(&s.stats.TotIntroducePersistEnd, 1)
s.rootLock.Lock()
root := s.root
root.AddRef()
nextSnapshotEpoch := s.nextSnapshotEpoch
s.nextSnapshotEpoch++
s.rootLock.Unlock()
defer func() { _ = root.DecRef() }()
newIndexSnapshot := &IndexSnapshot{
parent: s,
epoch: nextSnapshotEpoch,
segment: make([]*SegmentSnapshot, len(root.segment)),
offsets: make([]uint64, len(root.offsets)),
internal: make(map[string][]byte, len(root.internal)),
refs: 1,
creator: "introducePersist",
}
var docsToPersistCount, memSegments, fileSegments uint64
for i, segmentSnapshot := range root.segment {
// see if this segment has been replaced
if replacement, ok := persist.persisted[segmentSnapshot.id]; ok {
newSegmentSnapshot := &SegmentSnapshot{
id: segmentSnapshot.id,
segment: replacement,
deleted: segmentSnapshot.deleted,
stats: segmentSnapshot.stats,
cachedDocs: segmentSnapshot.cachedDocs,
cachedMeta: segmentSnapshot.cachedMeta,
creator: "introducePersist",
mmaped: 1,
}
newIndexSnapshot.segment[i] = newSegmentSnapshot
delete(persist.persisted, segmentSnapshot.id)
// update items persisted in case of a new segment snapshot
atomic.AddUint64(&s.stats.TotPersistedItems, newSegmentSnapshot.Count())
atomic.AddUint64(&s.stats.TotPersistedSegments, 1)
fileSegments++
} else {
newIndexSnapshot.segment[i] = root.segment[i]
newIndexSnapshot.segment[i].segment.AddRef()
if isMemorySegment(root.segment[i]) {
docsToPersistCount += root.segment[i].Count()
memSegments++
} else {
fileSegments++
}
}
newIndexSnapshot.offsets[i] = root.offsets[i]
}
for k, v := range root.internal {
newIndexSnapshot.internal[k] = v
}
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
newIndexSnapshot.updateSize()
s.rootLock.Lock()
rootPrev := s.root
s.root = newIndexSnapshot
atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
s.rootLock.Unlock()
if rootPrev != nil {
_ = rootPrev.DecRef()
}
close(persist.applied)
}
// The introducer should definitely handle the segmentMerge.notify
// channel before exiting the introduceMerge.
func (s *Scorch) introduceMerge(nextMerge *segmentMerge) {
atomic.AddUint64(&s.stats.TotIntroduceMergeBeg, 1)
defer atomic.AddUint64(&s.stats.TotIntroduceMergeEnd, 1)
s.rootLock.RLock()
root := s.root
root.AddRef()
s.rootLock.RUnlock()
defer func() { _ = root.DecRef() }()
newSnapshot := &IndexSnapshot{
parent: s,
internal: root.internal,
refs: 1,
creator: "introduceMerge",
}
var running, docsToPersistCount, memSegments, fileSegments uint64
var droppedSegmentFiles []string
newSegmentDeleted := make([]*roaring.Bitmap, len(nextMerge.new))
for i := range newSegmentDeleted {
// create a bitmaps to track the obsoletes per newly merged segments
newSegmentDeleted[i] = roaring.NewBitmap()
}
// iterate through current segments
for i := range root.segment {
segmentID := root.segment[i].id
if segSnapAtMerge, ok := nextMerge.mergedSegHistory[segmentID]; ok {
// this segment is going away, see if anything else was deleted since we started the merge
if segSnapAtMerge != nil && root.segment[i].deleted != nil {
// assume all these deletes are new
deletedSince := root.segment[i].deleted
// if we already knew about some of them, remove
if segSnapAtMerge.oldSegment.deleted != nil {
deletedSince = roaring.AndNot(root.segment[i].deleted, segSnapAtMerge.oldSegment.deleted)
}
deletedSinceItr := deletedSince.Iterator()
for deletedSinceItr.HasNext() {
oldDocNum := deletedSinceItr.Next()
newDocNum := segSnapAtMerge.oldNewDocIDs[oldDocNum]
newSegmentDeleted[segSnapAtMerge.workerID].Add(uint32(newDocNum))
}
}
// clean up the old segment map to figure out the
// obsolete segments wrt root in meantime, whatever
// segments left behind in old map after processing
// the root segments would be the obsolete segment set
delete(nextMerge.mergedSegHistory, segmentID)
} else if root.segment[i].LiveSize() > 0 {
// this segment is staying
newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
id: root.segment[i].id,
segment: root.segment[i].segment,
deleted: root.segment[i].deleted,
stats: root.segment[i].stats,
cachedDocs: root.segment[i].cachedDocs,
cachedMeta: root.segment[i].cachedMeta,
creator: root.segment[i].creator,
})
root.segment[i].segment.AddRef()
newSnapshot.offsets = append(newSnapshot.offsets, running)
running += root.segment[i].segment.Count()
if isMemorySegment(root.segment[i]) {
docsToPersistCount += root.segment[i].Count()
memSegments++
} else {
fileSegments++
}
} else if root.segment[i].LiveSize() == 0 {
if seg, ok := root.segment[i].segment.(segment.PersistedSegment); ok {
droppedSegmentFiles = append(droppedSegmentFiles,
filepath.Base(seg.Path()))
}
}
}
// before the newMerge introduction, need to clean the newly
// merged segment wrt the current root segments, hence
// applying the obsolete segment contents to newly merged segment
for _, ss := range nextMerge.mergedSegHistory {
obsoleted := ss.oldSegment.DocNumbersLive()
if obsoleted != nil {
obsoletedIter := obsoleted.Iterator()
for obsoletedIter.HasNext() {
oldDocNum := obsoletedIter.Next()
newDocNum := ss.oldNewDocIDs[oldDocNum]
newSegmentDeleted[ss.workerID].Add(uint32(newDocNum))
}
}
}
skipped := true
// make the newly merged segments part of the newSnapshot being constructed
for i, newMergedSegment := range nextMerge.new {
// checking if this newly merged segment is worth keeping based on
// obsoleted doc count since the merge intro started
if newMergedSegment != nil &&
newMergedSegment.Count() > newSegmentDeleted[i].GetCardinality() {
stats := newFieldStats()
if fsr, ok := newMergedSegment.(segment.FieldStatsReporter); ok {
fsr.UpdateFieldStats(stats)
}
// put the merged segment at the end of newSnapshot
newSnapshot.segment = append(newSnapshot.segment, &SegmentSnapshot{
id: nextMerge.id[i],
segment: newMergedSegment, // take ownership for nextMerge.new's ref-count
deleted: newSegmentDeleted[i],
stats: stats,
cachedDocs: &cachedDocs{cache: nil},
cachedMeta: &cachedMeta{meta: nil},
creator: "introduceMerge",
mmaped: nextMerge.mmaped,
})
newSnapshot.offsets = append(newSnapshot.offsets, running)
running += newMergedSegment.Count()
switch newMergedSegment.(type) {
case segment.PersistedSegment:
fileSegments++
default:
docsToPersistCount += newMergedSegment.Count() - newSegmentDeleted[i].GetCardinality()
memSegments++
}
skipped = false
}
}
if skipped {
atomic.AddUint64(&s.stats.TotFileMergeIntroductionsObsoleted, 1)
} else {
atomic.AddUint64(&s.stats.TotIntroducedSegmentsMerge, uint64(len(nextMerge.new)))
}
atomic.StoreUint64(&s.stats.TotItemsToPersist, docsToPersistCount)
atomic.StoreUint64(&s.stats.TotMemorySegmentsAtRoot, memSegments)
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, fileSegments)
newSnapshot.AddRef() // 1 ref for the nextMerge.notify response
newSnapshot.updateSize()
s.rootLock.Lock()
// swap in new index snapshot
newSnapshot.epoch = s.nextSnapshotEpoch
s.nextSnapshotEpoch++
rootPrev := s.root
s.root = newSnapshot
atomic.StoreUint64(&s.stats.CurRootEpoch, s.root.epoch)
// release lock
s.rootLock.Unlock()
if rootPrev != nil {
_ = rootPrev.DecRef()
}
// update the removal eligibility for those segment files
// that are not a part of the latest root.
for _, filename := range droppedSegmentFiles {
s.unmarkIneligibleForRemoval(filename)
}
// notify requester that we incorporated this
nextMerge.notifyCh <- &mergeTaskIntroStatus{
indexSnapshot: newSnapshot,
skipped: skipped}
close(nextMerge.notifyCh)
}
func isMemorySegment(s *SegmentSnapshot) bool {
switch s.segment.(type) {
case segment.PersistedSegment:
return false
default:
return true
}
}
================================================
FILE: index/scorch/merge.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"context"
"fmt"
"os"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/RoaringBitmap/roaring/v2"
"github.com/blevesearch/bleve/v2/index/scorch/mergeplan"
"github.com/blevesearch/bleve/v2/util"
segment "github.com/blevesearch/scorch_segment_api/v2"
)
const merger = "merger"
func (s *Scorch) mergerLoop() {
defer func() {
if r := recover(); r != nil {
s.fireAsyncError(NewScorchError(
merger,
fmt.Sprintf("panic: %v, path: %s", r, s.path),
ErrAsyncPanic,
))
}
s.asyncTasks.Done()
}()
var lastEpochMergePlanned uint64
var ctrlMsg *mergerCtrl
mergePlannerOptions, err := s.parseMergePlannerOptions()
if err != nil {
s.fireAsyncError(NewScorchError(
merger,
fmt.Sprintf("mergerPlannerOptions json parsing err: %v", err),
ErrOptionsParse,
))
return
}
ctrlMsgDflt := &mergerCtrl{ctx: context.Background(),
options: mergePlannerOptions,
doneCh: nil}
OUTER:
for {
atomic.AddUint64(&s.stats.TotFileMergeLoopBeg, 1)
select {
case <-s.closeCh:
break OUTER
default:
// check to see if there is a new snapshot to persist
s.rootLock.Lock()
ourSnapshot := s.root
ourSnapshot.AddRef()
atomic.StoreUint64(&s.iStats.mergeSnapshotSize, uint64(ourSnapshot.Size()))
atomic.StoreUint64(&s.iStats.mergeEpoch, ourSnapshot.epoch)
s.rootLock.Unlock()
if ctrlMsg == nil && ourSnapshot.epoch != lastEpochMergePlanned {
ctrlMsg = ctrlMsgDflt
}
if ctrlMsg != nil {
continueMerge := s.fireEvent(EventKindPreMergeCheck, 0)
// The default, if there's no handler, is to continue the merge.
if !continueMerge {
// If it's decided that this merge can't take place now,
// begin the merge process all over again.
// Retry instead of blocking/waiting here since a long wait
// can result in more segments introduced i.e. s.root will
// be updated.
// decrement the ref count since its no longer needed in this
// iteration
_ = ourSnapshot.DecRef()
continue OUTER
}
startTime := time.Now()
// lets get started
err := s.planMergeAtSnapshot(ctrlMsg.ctx, ctrlMsg.options,
ourSnapshot)
if err != nil {
atomic.StoreUint64(&s.iStats.mergeEpoch, 0)
if err == segment.ErrClosed {
// index has been closed
_ = ourSnapshot.DecRef()
// continue the workloop on a user triggered cancel
if ctrlMsg.doneCh != nil {
close(ctrlMsg.doneCh)
ctrlMsg = nil
continue OUTER
}
// exit the workloop on index closure
ctrlMsg = nil
break OUTER
}
s.fireAsyncError(NewScorchError(
merger,
fmt.Sprintf("merging err: %v", err),
ErrPersist,
))
_ = ourSnapshot.DecRef()
atomic.AddUint64(&s.stats.TotFileMergeLoopErr, 1)
continue OUTER
}
if ctrlMsg.doneCh != nil {
close(ctrlMsg.doneCh)
}
ctrlMsg = nil
lastEpochMergePlanned = ourSnapshot.epoch
atomic.StoreUint64(&s.stats.LastMergedEpoch, ourSnapshot.epoch)
s.fireEvent(EventKindMergerProgress, time.Since(startTime))
}
_ = ourSnapshot.DecRef()
// tell the persister we're waiting for changes
// first make a epochWatcher chan
ew := &epochWatcher{
epoch: lastEpochMergePlanned,
notifyCh: make(notificationChan, 1),
}
// give it to the persister
select {
case <-s.closeCh:
break OUTER
case s.persisterNotifier <- ew:
case ctrlMsg = <-s.forceMergeRequestCh:
continue OUTER
}
// now wait for persister (but also detect close)
select {
case <-s.closeCh:
break OUTER
case <-ew.notifyCh:
case ctrlMsg = <-s.forceMergeRequestCh:
}
}
atomic.AddUint64(&s.stats.TotFileMergeLoopEnd, 1)
}
}
type mergerCtrl struct {
ctx context.Context
options *mergeplan.MergePlanOptions
doneCh chan struct{}
}
// ForceMerge helps users trigger a merge operation on
// an online scorch index.
func (s *Scorch) ForceMerge(ctx context.Context,
mo *mergeplan.MergePlanOptions) error {
// check whether force merge is already under processing
s.rootLock.Lock()
if s.stats.TotFileMergeForceOpsStarted >
s.stats.TotFileMergeForceOpsCompleted {
s.rootLock.Unlock()
return fmt.Errorf("force merge already in progress")
}
s.stats.TotFileMergeForceOpsStarted++
s.rootLock.Unlock()
if mo != nil {
err := mergeplan.ValidateMergePlannerOptions(mo)
if err != nil {
return err
}
} else {
// assume the default single segment merge policy
mo = &mergeplan.SingleSegmentMergePlanOptions
}
msg := &mergerCtrl{options: mo,
doneCh: make(chan struct{}),
ctx: ctx,
}
// request the merger perform a force merge
select {
case s.forceMergeRequestCh <- msg:
case <-s.closeCh:
return nil
}
// wait for the force merge operation completion
select {
case <-msg.doneCh:
atomic.AddUint64(&s.stats.TotFileMergeForceOpsCompleted, 1)
case <-s.closeCh:
}
return nil
}
func (s *Scorch) parseMergePlannerOptions() (*mergeplan.MergePlanOptions,
error) {
mergePlannerOptions := mergeplan.DefaultMergePlanOptions
po, err := s.parsePersisterOptions()
if err != nil {
return nil, err
}
// by default use the MaxSizeInMemoryMergePerWorker from the persister option
// as the FloorSegmentFileSize for the merge planner which would be the
// first tier size in the planning. If the value is 0, then we don't use the
// file size in the planning.
mergePlannerOptions.FloorSegmentFileSize = int64(po.MaxSizeInMemoryMergePerWorker)
if v, ok := s.config["scorchMergePlanOptions"]; ok {
b, err := util.MarshalJSON(v)
if err != nil {
return &mergePlannerOptions, err
}
err = util.UnmarshalJSON(b, &mergePlannerOptions)
if err != nil {
return &mergePlannerOptions, err
}
err = mergeplan.ValidateMergePlannerOptions(&mergePlannerOptions)
if err != nil {
return nil, err
}
}
return &mergePlannerOptions, nil
}
type closeChWrapper struct {
ch1 chan struct{}
ctx context.Context
closeCh chan struct{}
cancelCh chan struct{}
}
func newCloseChWrapper(ch1 chan struct{},
ctx context.Context) *closeChWrapper {
return &closeChWrapper{
ch1: ch1,
ctx: ctx,
closeCh: make(chan struct{}),
cancelCh: make(chan struct{}),
}
}
func (w *closeChWrapper) close() {
close(w.closeCh)
}
func (w *closeChWrapper) listen() {
select {
case <-w.ch1:
close(w.cancelCh)
case <-w.ctx.Done():
close(w.cancelCh)
case <-w.closeCh:
}
}
func (s *Scorch) planMergeAtSnapshot(ctx context.Context,
options *mergeplan.MergePlanOptions, ourSnapshot *IndexSnapshot) error {
// build list of persisted segments in this snapshot
var onlyPersistedSnapshots []mergeplan.Segment
for _, segmentSnapshot := range ourSnapshot.segment {
if _, ok := segmentSnapshot.segment.(segment.PersistedSegment); ok {
onlyPersistedSnapshots = append(onlyPersistedSnapshots, segmentSnapshot)
}
}
atomic.AddUint64(&s.stats.TotFileMergePlan, 1)
// give this list to the planner
resultMergePlan, err := mergeplan.Plan(onlyPersistedSnapshots, options)
if err != nil {
atomic.AddUint64(&s.stats.TotFileMergePlanErr, 1)
return fmt.Errorf("merge planning err: %v", err)
}
if resultMergePlan == nil {
// nothing to do
atomic.AddUint64(&s.stats.TotFileMergePlanNone, 1)
return nil
}
atomic.AddUint64(&s.stats.TotFileMergePlanOk, 1)
atomic.AddUint64(&s.stats.TotFileMergePlanTasks, uint64(len(resultMergePlan.Tasks)))
// process tasks in serial for now
var filenames []string
cw := newCloseChWrapper(s.closeCh, ctx)
defer cw.close()
go cw.listen()
for _, task := range resultMergePlan.Tasks {
if len(task.Segments) == 0 {
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegmentsEmpty, 1)
continue
}
atomic.AddUint64(&s.stats.TotFileMergePlanTasksSegments, uint64(len(task.Segments)))
oldMap := make(map[uint64]*SegmentSnapshot, len(task.Segments))
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
segmentsToMerge := make([]segment.Segment, 0, len(task.Segments))
docsToDrop := make([]*roaring.Bitmap, 0, len(task.Segments))
mergedSegHistory := make(map[uint64]*mergedSegmentHistory, len(task.Segments))
for _, planSegment := range task.Segments {
if segSnapshot, ok := planSegment.(*SegmentSnapshot); ok {
oldMap[segSnapshot.id] = segSnapshot
mergedSegHistory[segSnapshot.id] = &mergedSegmentHistory{
workerID: 0,
oldSegment: segSnapshot,
}
if persistedSeg, ok := segSnapshot.segment.(segment.PersistedSegment); ok {
if segSnapshot.LiveSize() == 0 {
atomic.AddUint64(&s.stats.TotFileMergeSegmentsEmpty, 1)
oldMap[segSnapshot.id] = nil
delete(mergedSegHistory, segSnapshot.id)
} else {
segmentsToMerge = append(segmentsToMerge, segSnapshot.segment)
docsToDrop = append(docsToDrop, segSnapshot.deleted)
}
// track the files getting merged for unsetting the
// removal ineligibility. This helps to unflip files
// even with fast merger, slow persister work flows.
path := persistedSeg.Path()
filenames = append(filenames,
strings.TrimPrefix(path, s.path+string(os.PathSeparator)))
}
}
}
var seg segment.Segment
var filename string
if len(segmentsToMerge) > 0 {
filename = zapFileName(newSegmentID)
s.markIneligibleForRemoval(filename)
path := s.path + string(os.PathSeparator) + filename
fileMergeZapStartTime := time.Now()
atomic.AddUint64(&s.stats.TotFileMergeZapBeg, 1)
prevBytesReadTotal := cumulateBytesRead(segmentsToMerge)
newDocNums, _, err := s.segPlugin.MergeUsing(segmentsToMerge, docsToDrop, path,
cw.cancelCh, s, s.segmentConfig)
atomic.AddUint64(&s.stats.TotFileMergeZapEnd, 1)
fileMergeZapTime := uint64(time.Since(fileMergeZapStartTime))
atomic.AddUint64(&s.stats.TotFileMergeZapTime, fileMergeZapTime)
if atomic.LoadUint64(&s.stats.MaxFileMergeZapTime) < fileMergeZapTime {
atomic.StoreUint64(&s.stats.MaxFileMergeZapTime, fileMergeZapTime)
}
if err != nil {
s.unmarkIneligibleForRemoval(filename)
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
if err == segment.ErrClosed {
return err
}
return fmt.Errorf("merging failed: %v", err)
}
seg, err = s.segPlugin.OpenUsing(path, s.segmentConfig)
if err != nil {
s.unmarkIneligibleForRemoval(filename)
atomic.AddUint64(&s.stats.TotFileMergePlanTasksErr, 1)
return err
}
totalBytesRead := seg.BytesRead() + prevBytesReadTotal
seg.ResetBytesRead(totalBytesRead)
for i, segNewDocNums := range newDocNums {
if mergedSegHistory[task.Segments[i].Id()] != nil {
mergedSegHistory[task.Segments[i].Id()].oldNewDocIDs = segNewDocNums
}
}
atomic.AddUint64(&s.stats.TotFileMergeSegments, uint64(len(segmentsToMerge)))
}
sm := &segmentMerge{
id: []uint64{newSegmentID},
mergedSegHistory: mergedSegHistory,
new: []segment.Segment{seg},
newCount: seg.Count(),
notifyCh: make(chan *mergeTaskIntroStatus),
mmaped: 1,
}
s.fireEvent(EventKindMergeTaskIntroductionStart, 0)
// give it to the introducer
select {
case <-s.closeCh:
_ = seg.Close()
return segment.ErrClosed
case s.merges <- sm:
atomic.AddUint64(&s.stats.TotFileMergeIntroductions, 1)
}
introStartTime := time.Now()
// it is safe to blockingly wait for the merge introduction
// here as the introducer is bound to handle the notify channel.
introStatus := <-sm.notifyCh
introTime := uint64(time.Since(introStartTime))
atomic.AddUint64(&s.stats.TotFileMergeZapIntroductionTime, introTime)
if atomic.LoadUint64(&s.stats.MaxFileMergeZapIntroductionTime) < introTime {
atomic.StoreUint64(&s.stats.MaxFileMergeZapIntroductionTime, introTime)
}
atomic.AddUint64(&s.stats.TotFileMergeIntroductionsDone, 1)
if introStatus != nil && introStatus.indexSnapshot != nil {
_ = introStatus.indexSnapshot.DecRef()
if introStatus.skipped {
// close the segment on skipping introduction.
s.unmarkIneligibleForRemoval(filename)
_ = seg.Close()
}
}
atomic.AddUint64(&s.stats.TotFileMergePlanTasksDone, 1)
s.fireEvent(EventKindMergeTaskIntroduction, 0)
}
// once all the newly merged segment introductions are done,
// its safe to unflip the removal ineligibility for the replaced
// older segments
for _, f := range filenames {
s.unmarkIneligibleForRemoval(f)
}
return nil
}
type mergeTaskIntroStatus struct {
indexSnapshot *IndexSnapshot
skipped bool
}
// this is important when it comes to introducing multiple merged segments in a
// single introducer channel push. That way there is a check to ensure that the
// file count doesn't explode during the index's lifetime.
type mergedSegmentHistory struct {
workerID uint64
oldNewDocIDs []uint64
oldSegment *SegmentSnapshot
}
type segmentMerge struct {
id []uint64
new []segment.Segment
mergedSegHistory map[uint64]*mergedSegmentHistory
notifyCh chan *mergeTaskIntroStatus
mmaped uint32
newCount uint64
}
func cumulateBytesRead(sbs []segment.Segment) uint64 {
var rv uint64
for _, seg := range sbs {
rv += seg.BytesRead()
}
return rv
}
func closeNewMergedSegments(segs []segment.Segment) error {
for _, seg := range segs {
if seg != nil {
_ = seg.DecRef()
}
}
return nil
}
// mergeAndPersistInMemorySegments takes an IndexSnapshot and a list of in-memory segments,
// which are merged and persisted to disk concurrently. These are then introduced as
// the new root snapshot in one-shot.
func (s *Scorch) mergeAndPersistInMemorySegments(snapshot *IndexSnapshot,
flushableObjs []*flushable) (*IndexSnapshot, []uint64, error) {
atomic.AddUint64(&s.stats.TotMemMergeBeg, 1)
memMergeZapStartTime := time.Now()
atomic.AddUint64(&s.stats.TotMemMergeZapBeg, 1)
var wg sync.WaitGroup
// we're tracking the merged segments and their doc number per worker
// to be able to introduce them all at once, so the first dimension of the
// slices here correspond to workerID
newDocIDsSet := make([][][]uint64, len(flushableObjs))
newMergedSegments := make([]segment.Segment, len(flushableObjs))
newMergedSegmentIDs := make([]uint64, len(flushableObjs))
numFlushes := len(flushableObjs)
var numSegments, newMergedCount uint64
var em sync.Mutex
var errs []error
// deploy the workers to merge and flush the batches of segments concurrently
// and create a new file segment
for i := 0; i < numFlushes; i++ {
wg.Add(1)
go func(segsBatch []segment.Segment, dropsBatch []*roaring.Bitmap, id int) {
defer wg.Done()
newSegmentID := atomic.AddUint64(&s.nextSegmentID, 1)
filename := zapFileName(newSegmentID)
path := s.path + string(os.PathSeparator) + filename
// the newly merged segment is already flushed out to disk, just needs
// to be opened using mmap.
newDocIDs, _, err :=
s.segPlugin.MergeUsing(segsBatch, dropsBatch, path, s.closeCh, s, s.segmentConfig)
if err != nil {
em.Lock()
errs = append(errs, err)
em.Unlock()
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
return
}
// to prevent accidental cleanup of this newly created file, mark it
// as ineligible for removal. this will be flipped back when the bolt
// is updated - which is valid, since the snapshot updated in bolt is
// cleaned up only if its zero ref'd (MB-66163 for more details)
s.markIneligibleForRemoval(filename)
newMergedSegmentIDs[id] = newSegmentID
newDocIDsSet[id] = newDocIDs
newMergedSegments[id], err = s.segPlugin.OpenUsing(path, s.segmentConfig)
if err != nil {
em.Lock()
errs = append(errs, err)
em.Unlock()
atomic.AddUint64(&s.stats.TotMemMergeErr, 1)
return
}
atomic.AddUint64(&newMergedCount, newMergedSegments[id].Count())
atomic.AddUint64(&numSegments, uint64(len(segsBatch)))
}(flushableObjs[i].segments, flushableObjs[i].drops, i)
}
wg.Wait()
if errs != nil {
// close the new merged segments
_ = closeNewMergedSegments(newMergedSegments)
var errf error
for _, err := range errs {
if err == segment.ErrClosed {
// the index snapshot was closed which will be handled gracefully
// by retrying the whole merge+flush operation in a later iteration
// so its safe to early exit the same error.
return nil, nil, err
}
errf = fmt.Errorf("%w; %v", errf, err)
}
return nil, nil, errf
}
atomic.AddUint64(&s.stats.TotMemMergeZapEnd, 1)
memMergeZapTime := uint64(time.Since(memMergeZapStartTime))
atomic.AddUint64(&s.stats.TotMemMergeZapTime, memMergeZapTime)
if atomic.LoadUint64(&s.stats.MaxMemMergeZapTime) < memMergeZapTime {
atomic.StoreUint64(&s.stats.MaxMemMergeZapTime, memMergeZapTime)
}
// update the segmentMerge task with the newly merged + flushed segments which
// are to be introduced atomically.
sm := &segmentMerge{
id: newMergedSegmentIDs,
new: newMergedSegments,
mergedSegHistory: make(map[uint64]*mergedSegmentHistory, numSegments),
notifyCh: make(chan *mergeTaskIntroStatus),
newCount: newMergedCount,
}
// create a history map which maps the old in-memory segments with the specific
// persister worker (also the specific file segment its going to be part of)
// which flushed it out. This map will be used on the introducer side to out-ref
// the in-memory segments and also track the new tombstones if present.
for i, flushable := range flushableObjs {
for j, idx := range flushable.sbIdxs {
ss := snapshot.segment[idx]
// oldSegmentSnapshot.id -> {workerID, oldSegmentSnapshot, docIDs}
sm.mergedSegHistory[ss.id] = &mergedSegmentHistory{
workerID: uint64(i),
oldNewDocIDs: newDocIDsSet[i][j],
oldSegment: ss,
}
}
}
select { // send to introducer
case <-s.closeCh:
_ = closeNewMergedSegments(newMergedSegments)
return nil, nil, segment.ErrClosed
case s.merges <- sm:
}
// blockingly wait for the introduction to complete
var newSnapshot *IndexSnapshot
introStatus := <-sm.notifyCh
if introStatus != nil && introStatus.indexSnapshot != nil {
newSnapshot = introStatus.indexSnapshot
atomic.AddUint64(&s.stats.TotMemMergeSegments, uint64(numSegments))
atomic.AddUint64(&s.stats.TotMemMergeDone, 1)
if introStatus.skipped {
// close the segment on skipping introduction.
_ = newSnapshot.DecRef()
_ = closeNewMergedSegments(newMergedSegments)
newSnapshot = nil
}
}
return newSnapshot, newMergedSegmentIDs, nil
}
func (s *Scorch) ReportBytesWritten(bytesWritten uint64) {
atomic.AddUint64(&s.stats.TotFileMergeWrittenBytes, bytesWritten)
}
================================================
FILE: index/scorch/merge_test.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"sync"
"sync/atomic"
"testing"
"github.com/blevesearch/bleve/v2/document"
index "github.com/blevesearch/bleve_index_api"
)
func TestObsoleteSegmentMergeIntroduction(t *testing.T) {
testConfig := CreateConfig("TestObsoleteSegmentMergeIntroduction")
err := InitTest(testConfig)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(testConfig)
if err != nil {
t.Fatal(err)
}
}()
var introComplete, mergeIntroStart, mergeIntroComplete sync.WaitGroup
introComplete.Add(1)
mergeIntroStart.Add(1)
mergeIntroComplete.Add(1)
var segIntroCompleted int
RegistryEventCallbacks["test"] = func(e Event) bool {
switch e.Kind {
case EventKindBatchIntroduction:
segIntroCompleted++
if segIntroCompleted == 3 {
// all 3 segments introduced
introComplete.Done()
}
case EventKindMergeTaskIntroductionStart:
// signal the start of merge task introduction so that
// we can introduce a new batch which obsoletes the
// merged segment's contents.
mergeIntroStart.Done()
// hold the merge task introduction until the merged segment contents
// are obsoleted with the next batch/segment introduction.
introComplete.Wait()
case EventKindMergeTaskIntroduction:
// signal the completion of the merge task introduction.
mergeIntroComplete.Done()
}
return true
}
ourConfig := make(map[string]interface{}, len(testConfig))
for k, v := range testConfig {
ourConfig[k] = v
}
ourConfig["eventCallbackName"] = "test"
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, ourConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
// first introduce two documents over two batches.
batch := index.NewBatch()
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
batch.Update(doc)
err = idx.Batch(batch)
if err != nil {
t.Error(err)
}
batch.Reset()
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2updated")))
batch.Update(doc)
err = idx.Batch(batch)
if err != nil {
t.Error(err)
}
// wait until the merger trying to introduce the new merged segment.
mergeIntroStart.Wait()
// execute another batch which obsoletes the contents of the new merged
// segment awaiting introduction.
batch.Reset()
batch.Delete("1")
batch.Delete("2")
doc = document.NewDocument("3")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3updated")))
batch.Update(doc)
err = idx.Batch(batch)
if err != nil {
t.Error(err)
}
// wait until the merge task introduction complete.
mergeIntroComplete.Wait()
idxr, err := idx.Reader()
if err != nil {
t.Error(err)
}
numSegments := len(idxr.(*IndexSnapshot).segment)
if numSegments != 1 {
t.Errorf("expected one segment at the root, got: %d", numSegments)
}
skipIntroCount := atomic.LoadUint64(&idxr.(*IndexSnapshot).parent.stats.TotFileMergeIntroductionsObsoleted)
if skipIntroCount != 1 {
t.Errorf("expected one obsolete merge segment skipping the introduction, got: %d", skipIntroCount)
}
docCount, err := idxr.DocCount()
if err != nil {
t.Fatal(err)
}
if docCount != 1 {
t.Errorf("Expected document count to be %d got %d", 1, docCount)
}
err = idxr.Close()
if err != nil {
t.Fatal(err)
}
}
================================================
FILE: index/scorch/mergeplan/merge_plan.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package mergeplan provides a segment merge planning approach that's
// inspired by Lucene's TieredMergePolicy.java and descriptions like
// http://blog.mikemccandless.com/2011/02/visualizing-lucenes-segment-merges.html
package mergeplan
import (
"errors"
"fmt"
"math"
"sort"
"strings"
)
// A Segment represents the information that the planner needs to
// calculate segment merging.
type Segment interface {
// Unique id of the segment -- used for sorting.
Id() uint64
// Full segment size (the size before any logical deletions).
FullSize() int64
// Size of the live data of the segment; i.e., FullSize() minus
// any logical deletions.
LiveSize() int64
HasVector() bool
// Size of the persisted segment file.
FileSize() int64
}
// Plan() will functionally compute a merge plan. A segment will be
// assigned to at most a single MergeTask in the output MergePlan. A
// segment not assigned to any MergeTask means the segment should
// remain unmerged.
func Plan(segments []Segment, o *MergePlanOptions) (*MergePlan, error) {
return plan(segments, o)
}
// A MergePlan is the result of the Plan() API.
//
// The planner doesn’t know how or whether these tasks are executed --
// that’s up to a separate merge execution system, which might execute
// these tasks concurrently or not, and which might execute all the
// tasks or not.
type MergePlan struct {
Tasks []*MergeTask
}
// A MergeTask represents several segments that should be merged
// together into a single segment.
type MergeTask struct {
Segments []Segment
}
// The MergePlanOptions is designed to be reusable between planning calls.
type MergePlanOptions struct {
// Max # segments per logarithmic tier, or max width of any
// logarithmic “step”. Smaller values mean more merging but fewer
// segments. Should be >= SegmentsPerMergeTask, else you'll have
// too much merging.
MaxSegmentsPerTier int
// Max size of any segment produced after merging. Actual
// merging, however, may produce segment sizes different than the
// planner’s predicted sizes.
MaxSegmentSize int64
// Max size (in bytes) of the persisted segment file that contains the
// vectors. This is used to prevent merging of segments that
// contain vectors that are too large.
MaxSegmentFileSize int64
// The growth factor for each tier in a staircase of idealized
// segments computed by CalcBudget().
TierGrowth float64
// The number of segments in any resulting MergeTask. e.g.,
// len(result.Tasks[ * ].Segments) == SegmentsPerMergeTask.
SegmentsPerMergeTask int
// Small segments are rounded up to this size, i.e., treated as
// equal (floor) size for consideration. This is to prevent lots
// of tiny segments from resulting in a long tail in the index.
FloorSegmentSize int64
// Small segments' file size are rounded up to this size to prevent lot
// of tiny segments causing a long tail in the index.
FloorSegmentFileSize int64
// Controls how aggressively merges that reclaim more deletions
// are favored. Higher values will more aggressively target
// merges that reclaim deletions, but be careful not to go so high
// that way too much merging takes place; a value of 3.0 is
// probably nearly too high. A value of 0.0 means deletions don't
// impact merge selection.
ReclaimDeletesWeight float64
// Optional, defaults to mergeplan.CalcBudget().
CalcBudget func(totalSize int64, firstTierSize int64,
o *MergePlanOptions) (budgetNumSegments int)
// Optional, defaults to mergeplan.ScoreSegments().
ScoreSegments func(segments []Segment, o *MergePlanOptions) float64
// Optional.
Logger func(string)
}
// Returns the higher of the input or FloorSegmentSize.
func (o *MergePlanOptions) RaiseToFloorSegmentSize(s int64) int64 {
if s > o.FloorSegmentSize {
return s
}
return o.FloorSegmentSize
}
func (o *MergePlanOptions) RaiseToFloorSegmentFileSize(s int64) int64 {
if s > o.FloorSegmentFileSize {
return s
}
return o.FloorSegmentFileSize
}
// MaxSegmentSizeLimit represents the maximum size of a segment,
// this limit comes with hit-1 optimisation/max encoding limit uint31.
const MaxSegmentSizeLimit = 1<<31 - 1
// ErrMaxSegmentSizeTooLarge is returned when the size of the segment
// exceeds the MaxSegmentSizeLimit
var ErrMaxSegmentSizeTooLarge = errors.New("MaxSegmentSize exceeds the size limit")
// DefaultMergePlanOptions suggests the default options.
var DefaultMergePlanOptions = MergePlanOptions{
MaxSegmentsPerTier: 10,
MaxSegmentSize: 5000000,
MaxSegmentFileSize: 4000000000, // 4GB
TierGrowth: 10.0,
SegmentsPerMergeTask: 10,
FloorSegmentSize: 2000,
ReclaimDeletesWeight: 2.0,
}
// SingleSegmentMergePlanOptions helps in creating a
// single segment index.
var SingleSegmentMergePlanOptions = MergePlanOptions{
MaxSegmentsPerTier: 1,
MaxSegmentSize: 1 << 30,
MaxSegmentFileSize: 1 << 40,
TierGrowth: 1.0,
SegmentsPerMergeTask: 10,
FloorSegmentSize: 1 << 30,
ReclaimDeletesWeight: 2.0,
FloorSegmentFileSize: 1 << 40,
}
// -------------------------------------------
func plan(segmentsIn []Segment, o *MergePlanOptions) (*MergePlan, error) {
if len(segmentsIn) <= 1 {
return nil, nil
}
if o == nil {
o = &DefaultMergePlanOptions
}
segments := append([]Segment(nil), segmentsIn...) // Copy.
sort.Sort(byLiveSizeDescending(segments))
var minLiveSize int64 = math.MaxInt64
var eligibles []Segment
var eligiblesLiveSize int64
var eligiblesFileSize int64
var minFileSize int64 = math.MaxInt64
for _, segment := range segments {
if minLiveSize > segment.LiveSize() {
minLiveSize = segment.LiveSize()
}
if minFileSize > segment.FileSize() {
minFileSize = segment.FileSize()
}
isEligible := segment.LiveSize() < o.MaxSegmentSize/2
// An eligible segment (based on #documents) may be too large
// and thus need a stricter check based on the file size.
// This is particularly important for segments that contain
// vectors.
if isEligible && segment.HasVector() && o.MaxSegmentFileSize > 0 {
isEligible = segment.FileSize() < o.MaxSegmentFileSize/2
}
// Only small-enough segments are eligible.
if isEligible {
eligibles = append(eligibles, segment)
eligiblesLiveSize += segment.LiveSize()
eligiblesFileSize += segment.FileSize()
}
}
calcBudget := o.CalcBudget
if calcBudget == nil {
calcBudget = CalcBudget
}
var budgetNumSegments int
if o.FloorSegmentFileSize > 0 {
minFileSize = o.RaiseToFloorSegmentFileSize(minFileSize)
budgetNumSegments = calcBudget(eligiblesFileSize, minFileSize, o)
} else {
minLiveSize = o.RaiseToFloorSegmentSize(minLiveSize)
budgetNumSegments = calcBudget(eligiblesLiveSize, minLiveSize, o)
}
scoreSegments := o.ScoreSegments
if scoreSegments == nil {
scoreSegments = ScoreSegments
}
rv := &MergePlan{}
var empties []Segment
for _, eligible := range eligibles {
if eligible.LiveSize() <= 0 {
empties = append(empties, eligible)
}
}
if len(empties) > 0 {
rv.Tasks = append(rv.Tasks, &MergeTask{Segments: empties})
eligibles = removeSegments(eligibles, empties)
}
// While we’re over budget, keep looping, which might produce
// another MergeTask.
for len(eligibles) > 0 && (len(eligibles)+len(rv.Tasks)) > budgetNumSegments {
// Track a current best roster as we examine and score
// potential rosters of merges.
var bestRoster []Segment
var bestRosterScore float64 // Lower score is better.
for startIdx := 0; startIdx < len(eligibles); startIdx++ {
var roster []Segment
var rosterLiveSize int64
var rosterFileSize int64 // useful for segments with vectors
for idx := startIdx; idx < len(eligibles) && len(roster) < o.SegmentsPerMergeTask; idx++ {
eligible := eligibles[idx]
if rosterLiveSize+eligible.LiveSize() >= o.MaxSegmentSize {
continue
}
if eligible.HasVector() {
efs := eligible.FileSize()
if rosterFileSize+efs >= o.MaxSegmentFileSize {
continue
}
rosterFileSize += efs
}
roster = append(roster, eligible)
rosterLiveSize += eligible.LiveSize()
}
if len(roster) > 0 {
rosterScore := scoreSegments(roster, o)
if len(bestRoster) == 0 || rosterScore < bestRosterScore {
bestRoster = roster
bestRosterScore = rosterScore
}
}
}
if len(bestRoster) == 0 {
return rv, nil
}
// create tasks with valid merges - i.e. there should be at least 2 non-empty segments
if len(bestRoster) > 1 {
rv.Tasks = append(rv.Tasks, &MergeTask{Segments: bestRoster})
}
eligibles = removeSegments(eligibles, bestRoster)
}
return rv, nil
}
// Compute the number of segments that would be needed to cover the
// totalSize, by climbing up a logarithmically growing staircase of
// segment tiers.
func CalcBudget(totalSize int64, firstTierSize int64, o *MergePlanOptions) (
budgetNumSegments int) {
tierSize := firstTierSize
if tierSize < 1 {
tierSize = 1
}
maxSegmentsPerTier := o.MaxSegmentsPerTier
if maxSegmentsPerTier < 1 {
maxSegmentsPerTier = 1
}
tierGrowth := o.TierGrowth
if tierGrowth < 1.0 {
tierGrowth = 1.0
}
for totalSize > 0 {
segmentsInTier := float64(totalSize) / float64(tierSize)
if segmentsInTier < float64(maxSegmentsPerTier) {
budgetNumSegments += int(math.Ceil(segmentsInTier))
break
}
budgetNumSegments += maxSegmentsPerTier
totalSize -= int64(maxSegmentsPerTier) * tierSize
tierSize = int64(float64(tierSize) * tierGrowth)
}
return budgetNumSegments
}
// Of note, removeSegments() keeps the ordering of the results stable.
func removeSegments(segments []Segment, toRemove []Segment) []Segment {
rv := make([]Segment, 0, len(segments)-len(toRemove))
OUTER:
for _, segment := range segments {
for _, r := range toRemove {
if segment == r {
continue OUTER
}
}
rv = append(rv, segment)
}
return rv
}
// Smaller result score is better.
func ScoreSegments(segments []Segment, o *MergePlanOptions) float64 {
var totBeforeSize int64
var totAfterSize int64
var totAfterSizeFloored int64
for _, segment := range segments {
totBeforeSize += segment.FullSize()
totAfterSize += segment.LiveSize()
totAfterSizeFloored += o.RaiseToFloorSegmentSize(segment.LiveSize())
}
if totBeforeSize <= 0 || totAfterSize <= 0 || totAfterSizeFloored <= 0 {
return 0
}
// Roughly guess the "balance" of the segments -- whether the
// segments are about the same size.
balance :=
float64(o.RaiseToFloorSegmentSize(segments[0].LiveSize())) /
float64(totAfterSizeFloored)
// Gently favor smaller merges over bigger ones. We don't want to
// make the exponent too large else we end up with poor merges of
// small segments in order to avoid the large merges.
score := balance * math.Pow(float64(totAfterSize), 0.05)
// Strongly favor merges that reclaim deletes.
nonDelRatio := float64(totAfterSize) / float64(totBeforeSize)
score *= math.Pow(nonDelRatio, o.ReclaimDeletesWeight)
return score
}
// ------------------------------------------
// ToBarChart returns an ASCII rendering of the segments and the plan.
// The barMax is the max width of the bars in the bar chart.
func ToBarChart(prefix string, barMax int, segments []Segment, plan *MergePlan) string {
rv := make([]string, 0, len(segments))
var maxFullSize int64
for _, segment := range segments {
if maxFullSize < segment.FullSize() {
maxFullSize = segment.FullSize()
}
}
if maxFullSize < 0 {
maxFullSize = 1
}
for _, segment := range segments {
barFull := int(segment.FullSize())
barLive := int(segment.LiveSize())
if maxFullSize > int64(barMax) {
barFull = int(float64(barMax) * float64(barFull) / float64(maxFullSize))
barLive = int(float64(barMax) * float64(barLive) / float64(maxFullSize))
}
barKind := " "
barChar := "."
if plan != nil {
TASK_LOOP:
for taski, task := range plan.Tasks {
for _, taskSegment := range task.Segments {
if taskSegment == segment {
barKind = "*"
barChar = fmt.Sprintf("%d", taski)
break TASK_LOOP
}
}
}
}
bar :=
strings.Repeat(barChar, barLive)[0:barLive] +
strings.Repeat("x", barFull-barLive)[0:barFull-barLive]
rv = append(rv, fmt.Sprintf("%s %5d: %5d /%5d - %s %s", prefix,
segment.Id(),
segment.LiveSize(),
segment.FullSize(),
barKind, bar))
}
return strings.Join(rv, "\n")
}
// ValidateMergePlannerOptions validates the merge planner options
func ValidateMergePlannerOptions(options *MergePlanOptions) error {
if options.MaxSegmentSize > MaxSegmentSizeLimit {
return ErrMaxSegmentSizeTooLarge
}
return nil
}
================================================
FILE: index/scorch/mergeplan/merge_plan_test.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mergeplan
import (
"encoding/json"
"fmt"
"math/rand"
"os"
"reflect"
"sort"
"testing"
"time"
)
// Implements the Segment interface for testing,
type segment struct {
MyId uint64
MyFullSize int64
MyLiveSize int64
MyHasVector bool
MyFileSize int64
}
func (s *segment) Id() uint64 { return s.MyId }
func (s *segment) FullSize() int64 { return s.MyFullSize }
func (s *segment) LiveSize() int64 { return s.MyLiveSize }
func (s *segment) HasVector() bool { return s.MyHasVector }
func (s *segment) FileSize() int64 { return s.MyFileSize }
func makeLinearSegments(n int) (rv []Segment) {
for i := 0; i < n; i++ {
rv = append(rv, &segment{
MyId: uint64(i),
MyFullSize: int64(i),
MyLiveSize: int64(i),
})
}
return rv
}
// ----------------------------------------
func TestSimplePlan(t *testing.T) {
segs := makeLinearSegments(10)
tests := []struct {
Desc string
Segments []Segment
Options *MergePlanOptions
ExpectPlan *MergePlan
ExpectErr error
}{
{
"nil segments",
nil, nil, nil, nil,
},
{
"empty segments",
[]Segment{},
nil, nil, nil,
},
{
"1 segment",
[]Segment{segs[1]},
nil,
nil,
nil,
},
{
"2 segments",
[]Segment{
segs[1],
segs[2],
},
nil,
&MergePlan{
Tasks: []*MergeTask{
{
Segments: []Segment{
segs[2],
segs[1],
},
},
},
},
nil,
},
{
"3 segments",
[]Segment{
segs[1],
segs[2],
segs[9],
},
nil,
&MergePlan{
Tasks: []*MergeTask{
{
Segments: []Segment{
segs[9],
segs[2],
segs[1],
},
},
},
},
nil,
},
{
"many segments",
[]Segment{
segs[1],
segs[2],
segs[3],
segs[4],
segs[5],
segs[6],
},
&MergePlanOptions{
MaxSegmentsPerTier: 1,
MaxSegmentSize: 1000,
TierGrowth: 2.0,
SegmentsPerMergeTask: 2,
FloorSegmentSize: 1,
},
&MergePlan{
Tasks: []*MergeTask{
{
Segments: []Segment{
segs[6],
segs[5],
},
},
},
},
nil,
},
}
for testi, test := range tests {
plan, err := Plan(test.Segments, test.Options)
if err != test.ExpectErr {
testj, _ := json.Marshal(&test)
t.Errorf("testi: %d, test: %s, got err: %v", testi, testj, err)
}
if !reflect.DeepEqual(plan, test.ExpectPlan) {
testj, _ := json.Marshal(&test)
planj, _ := json.Marshal(&plan)
t.Errorf("testi: %d, test: %s, got plan: %s",
testi, testj, planj)
}
}
}
// ----------------------------------------
func TestSort(t *testing.T) {
segs := makeLinearSegments(10)
sort.Sort(byLiveSizeDescending(segs))
for i := 1; i < len(segs); i++ {
if segs[i].LiveSize() >= segs[i-1].LiveSize() {
t.Errorf("not descending")
}
}
}
// ----------------------------------------
func TestCalcBudget(t *testing.T) {
tests := []struct {
totalSize int64
firstTierSize int64
o MergePlanOptions
expect int
}{
{0, 0, MergePlanOptions{}, 0},
{1, 0, MergePlanOptions{}, 1},
{9, 0, MergePlanOptions{}, 9},
{
1, 1,
MergePlanOptions{
MaxSegmentsPerTier: 1,
MaxSegmentSize: 1000,
TierGrowth: 2.0,
SegmentsPerMergeTask: 2,
FloorSegmentSize: 1,
},
1,
},
{
21, 1,
MergePlanOptions{
MaxSegmentsPerTier: 1,
MaxSegmentSize: 1000,
TierGrowth: 2.0,
SegmentsPerMergeTask: 2,
FloorSegmentSize: 1,
},
5,
},
{
21, 1,
MergePlanOptions{
MaxSegmentsPerTier: 2,
MaxSegmentSize: 1000,
TierGrowth: 2.0,
SegmentsPerMergeTask: 2,
FloorSegmentSize: 1,
},
7,
},
{
1000, 2000, DefaultMergePlanOptions,
1,
},
{
5000, 2000, DefaultMergePlanOptions,
3,
},
{
10000, 2000, DefaultMergePlanOptions,
5,
},
{
30000, 2000, DefaultMergePlanOptions,
11,
},
{
1000000, 2000, DefaultMergePlanOptions,
24,
},
{
1000000000, 2000, DefaultMergePlanOptions,
54,
},
}
for testi, test := range tests {
res := CalcBudget(test.totalSize, test.firstTierSize, &test.o)
if res != test.expect {
t.Errorf("testi: %d, test: %#v, res: %v",
testi, test, res)
}
}
}
func TestCalcBudgetForSingleSegmentMergePolicy(t *testing.T) {
mpolicy := MergePlanOptions{
MaxSegmentsPerTier: 1,
MaxSegmentSize: 1 << 30, // ~ 1 Billion
SegmentsPerMergeTask: 10,
FloorSegmentSize: 1 << 30,
}
tests := []struct {
totalSize int64
firstTierSize int64
o MergePlanOptions
expect int
}{
{0, mpolicy.RaiseToFloorSegmentSize(0), mpolicy, 0},
{1, mpolicy.RaiseToFloorSegmentSize(1), mpolicy, 1},
{9, mpolicy.RaiseToFloorSegmentSize(0), mpolicy, 1},
{1, mpolicy.RaiseToFloorSegmentSize(1), mpolicy, 1},
{21, mpolicy.RaiseToFloorSegmentSize(21), mpolicy, 1},
{21, mpolicy.RaiseToFloorSegmentSize(21), mpolicy, 1},
{1000, mpolicy.RaiseToFloorSegmentSize(2000), mpolicy, 1},
{5000, mpolicy.RaiseToFloorSegmentSize(5000), mpolicy, 1},
{10000, mpolicy.RaiseToFloorSegmentSize(10000), mpolicy, 1},
{30000, mpolicy.RaiseToFloorSegmentSize(30000), mpolicy, 1},
{1000000, mpolicy.RaiseToFloorSegmentSize(1000000), mpolicy, 1},
{1000000000, 1 << 30, mpolicy, 1},
{1013423541, 1 << 30, mpolicy, 1},
{98765442, 1 << 30, mpolicy, 1},
}
for testi, test := range tests {
res := CalcBudget(test.totalSize, test.firstTierSize, &test.o)
if res != test.expect {
t.Errorf("testi: %d, test: %#v, res: %v",
testi, test, res)
}
}
}
// ----------------------------------------
func TestInsert1SameSizedSegmentBetweenMerges(t *testing.T) {
o := &MergePlanOptions{
MaxSegmentSize: 1000,
MaxSegmentsPerTier: 3,
TierGrowth: 3.0,
SegmentsPerMergeTask: 3,
}
spec := testCyclesSpec{
descrip: "i1sssbm",
verbose: os.Getenv("VERBOSE") == "i1sssbm" || os.Getenv("VERBOSE") == "y",
n: 200,
o: o,
beforePlan: func(spec *testCyclesSpec) {
spec.segments = append(spec.segments, &segment{
MyId: spec.nextSegmentId,
MyFullSize: 1,
MyLiveSize: 1,
})
spec.nextSegmentId++
},
}
spec.runCycles(t)
}
func TestInsertManySameSizedSegmentsBetweenMerges(t *testing.T) {
o := &MergePlanOptions{
MaxSegmentSize: 1000,
MaxSegmentsPerTier: 3,
TierGrowth: 3.0,
SegmentsPerMergeTask: 3,
}
spec := testCyclesSpec{
descrip: "imsssbm",
verbose: os.Getenv("VERBOSE") == "imsssbm" || os.Getenv("VERBOSE") == "y",
n: 20,
o: o,
beforePlan: func(spec *testCyclesSpec) {
for i := 0; i < 10; i++ {
spec.segments = append(spec.segments, &segment{
MyId: spec.nextSegmentId,
MyFullSize: 1,
MyLiveSize: 1,
})
spec.nextSegmentId++
}
},
}
spec.runCycles(t)
}
func TestInsertManySameSizedSegmentsWithDeletionsBetweenMerges(t *testing.T) {
o := &MergePlanOptions{
MaxSegmentSize: 1000,
MaxSegmentsPerTier: 3,
TierGrowth: 3.0,
SegmentsPerMergeTask: 3,
}
spec := testCyclesSpec{
descrip: "imssswdbm",
verbose: os.Getenv("VERBOSE") == "imssswdbm" || os.Getenv("VERBOSE") == "y",
n: 20,
o: o,
beforePlan: func(spec *testCyclesSpec) {
for i := 0; i < 10; i++ {
// Deletions are a shrinking of the live size.
for i, seg := range spec.segments {
if (spec.cycle+i)%5 == 0 {
s := seg.(*segment)
if s.MyLiveSize > 0 {
s.MyLiveSize -= 1
}
}
}
spec.segments = append(spec.segments, &segment{
MyId: spec.nextSegmentId,
MyFullSize: 1,
MyLiveSize: 1,
})
spec.nextSegmentId++
}
},
}
spec.runCycles(t)
}
func TestInsertManyDifferentSizedSegmentsBetweenMerges(t *testing.T) {
o := &MergePlanOptions{
MaxSegmentSize: 1000,
MaxSegmentsPerTier: 3,
TierGrowth: 3.0,
SegmentsPerMergeTask: 3,
}
spec := testCyclesSpec{
descrip: "imdssbm",
verbose: os.Getenv("VERBOSE") == "imdssbm" || os.Getenv("VERBOSE") == "y",
n: 20,
o: o,
beforePlan: func(spec *testCyclesSpec) {
for i := 0; i < 10; i++ {
spec.segments = append(spec.segments, &segment{
MyId: spec.nextSegmentId,
MyFullSize: int64(1 + (i % 5)),
MyLiveSize: int64(1 + (i % 5)),
})
spec.nextSegmentId++
}
},
}
spec.runCycles(t)
}
func TestManySameSizedSegmentsWithDeletesBetweenMerges(t *testing.T) {
o := &MergePlanOptions{
MaxSegmentSize: 1000,
MaxSegmentsPerTier: 3,
TierGrowth: 3.0,
SegmentsPerMergeTask: 3,
}
var numPlansWithTasks int
spec := testCyclesSpec{
descrip: "mssswdbm",
verbose: os.Getenv("VERBOSE") == "mssswdbm" || os.Getenv("VERBOSE") == "y",
n: 20,
o: o,
beforePlan: func(spec *testCyclesSpec) {
// Deletions are a shrinking of the live size.
for i, seg := range spec.segments {
if (spec.cycle+i)%5 == 0 {
s := seg.(*segment)
if s.MyLiveSize > 0 {
s.MyLiveSize -= 1
}
}
}
for i := 0; i < 10; i++ {
spec.segments = append(spec.segments, &segment{
MyId: spec.nextSegmentId,
MyFullSize: 1,
MyLiveSize: 1,
})
spec.nextSegmentId++
}
},
afterPlan: func(spec *testCyclesSpec, plan *MergePlan) {
if plan != nil && len(plan.Tasks) > 0 {
numPlansWithTasks++
}
},
}
spec.runCycles(t)
if numPlansWithTasks <= 0 {
t.Errorf("expected some plans with tasks")
}
}
func TestValidateMergePlannerOptions(t *testing.T) {
o := &MergePlanOptions{
MaxSegmentSize: 1 << 32,
MaxSegmentsPerTier: 3,
TierGrowth: 3.0,
SegmentsPerMergeTask: 3,
}
err := ValidateMergePlannerOptions(o)
if err != ErrMaxSegmentSizeTooLarge {
t.Error("Validation expected to fail as the MaxSegmentSize exceeds limit")
}
}
func TestPlanMaxSegmentSizeLimit(t *testing.T) {
o := &MergePlanOptions{
MaxSegmentSize: 20,
MaxSegmentsPerTier: 5,
TierGrowth: 3.0,
SegmentsPerMergeTask: 5,
FloorSegmentSize: 5,
}
segments := makeLinearSegments(20)
s := rand.NewSource(time.Now().UnixNano())
r := rand.New(s)
max := 20
min := 5
randomInRange := func() int64 {
return int64(r.Intn(max-min) + min)
}
for i := 1; i < 20; i++ {
o.MaxSegmentSize = randomInRange()
plans, err := Plan(segments, o)
if err != nil {
t.Errorf("Plan failed, err: %v", err)
}
if len(plans.Tasks) == 0 {
t.Errorf("expected some plans with tasks")
}
for _, task := range plans.Tasks {
var totalLiveSize int64
for _, segs := range task.Segments {
totalLiveSize += segs.LiveSize()
}
if totalLiveSize >= o.MaxSegmentSize {
t.Errorf("merged segments size: %d exceeding the MaxSegmentSize"+
"limit: %d", totalLiveSize, o.MaxSegmentSize)
}
}
}
}
// ----------------------------------------
type testCyclesSpec struct {
descrip string
verbose bool
n int // Number of cycles to run.
o *MergePlanOptions
beforePlan func(*testCyclesSpec)
afterPlan func(*testCyclesSpec, *MergePlan)
cycle int
segments []Segment
nextSegmentId uint64
}
func (spec *testCyclesSpec) runCycles(t *testing.T) {
numPlansWithTasks := 0
for spec.cycle < spec.n {
if spec.verbose {
emit(spec.descrip, spec.cycle, 0, spec.segments, nil)
}
if spec.beforePlan != nil {
spec.beforePlan(spec)
}
if spec.verbose {
emit(spec.descrip, spec.cycle, 1, spec.segments, nil)
}
plan, err := Plan(spec.segments, spec.o)
if err != nil {
t.Fatalf("expected no err, got: %v", err)
}
if spec.afterPlan != nil {
spec.afterPlan(spec, plan)
}
if spec.verbose {
emit(spec.descrip, spec.cycle, 2, spec.segments, plan)
}
if plan != nil {
if len(plan.Tasks) > 0 {
numPlansWithTasks++
}
for _, task := range plan.Tasks {
spec.segments = removeSegments(spec.segments, task.Segments)
var totLiveSize int64
for _, segment := range task.Segments {
totLiveSize += segment.LiveSize()
}
if totLiveSize > 0 {
spec.segments = append(spec.segments, &segment{
MyId: spec.nextSegmentId,
MyFullSize: totLiveSize,
MyLiveSize: totLiveSize,
})
spec.nextSegmentId++
}
}
}
spec.cycle++
}
if numPlansWithTasks <= 0 {
t.Errorf("expected some plans with tasks")
}
}
func emit(descrip string, cycle int, step int, segments []Segment, plan *MergePlan) {
if os.Getenv("VERBOSE") == "" {
return
}
suffix := ""
if plan != nil && len(plan.Tasks) > 0 {
suffix = "hasPlan"
}
fmt.Printf("%s %d.%d ---------- %s\n", descrip, cycle, step, suffix)
fmt.Printf("%s\n", ToBarChart(descrip, 100, segments, plan))
}
// -----------------------------------------------------------------------------
// Test Vector Segment Merging
func TestPlanMaxSegmentFileSize(t *testing.T) {
tests := []struct {
segments []Segment
o *MergePlanOptions
expectedTasks [][]uint64
}{
{
[]Segment{
&segment{ // ineligible
MyId: 1,
MyFullSize: 4000,
MyLiveSize: 3900,
MyHasVector: true,
MyFileSize: 3900 * 1000 * 4, // > 2MB
},
&segment{ // ineligible
MyId: 2,
MyFullSize: 6000,
MyLiveSize: 5500, // > 5000
MyHasVector: true,
MyFileSize: 5500 * 1000 * 4, // > 2MB
},
&segment{ // eligible
MyId: 3,
MyFullSize: 500,
MyLiveSize: 490,
MyHasVector: true,
MyFileSize: 490 * 1000 * 4,
},
&segment{ // eligible
MyId: 4,
MyFullSize: 500,
MyLiveSize: 480,
MyHasVector: true,
MyFileSize: 480 * 1000 * 4,
},
&segment{ // eligible
MyId: 5,
MyFullSize: 500,
MyLiveSize: 300,
MyHasVector: true,
MyFileSize: 300 * 1000 * 4,
},
&segment{ // eligible
MyId: 6,
MyFullSize: 500,
MyLiveSize: 400,
MyHasVector: true,
MyFileSize: 400 * 1000 * 4,
},
},
&MergePlanOptions{
MaxSegmentSize: 5000, // number of documents
// considering vector dimension as 1000
// vectorBytes = 5000 * 1000 * 4 = 20MB, which is too large
// So, let's set the fileSize limit to 4MB
MaxSegmentFileSize: 4000000, // 4MB
MaxSegmentsPerTier: 1,
SegmentsPerMergeTask: 2,
TierGrowth: 2.0,
FloorSegmentSize: 1,
},
[][]uint64{
{3, 4},
},
},
}
for testi, test := range tests {
t.Run(fmt.Sprintf("Test-%d", testi), func(t *testing.T) {
plans, err := Plan(test.segments, test.o)
if err != nil {
t.Fatalf("Plan failed, err: %v", err)
}
for i, task := range plans.Tasks {
var segIDs []uint64
for _, seg := range task.Segments {
segIDs = append(segIDs, seg.Id())
}
if !reflect.DeepEqual(segIDs, test.expectedTasks[0]) {
t.Errorf("expected task segments: %v, got: %v", test.expectedTasks[i], segIDs)
}
}
})
}
}
func TestSingleTaskMergePlan(t *testing.T) {
o := &DefaultMergePlanOptions
o.FloorSegmentFileSize = 209715200
// borrowing the spec values from MB-66112
//
// both segments are eligible, but the roster with a single segment is scored
// higher than the roster with two segments
// in this case the merge plan returns task with a single segment non-empty
// segment which when introduced into the scorch system doesn't cause any change
// and you'd be stuck in an infinite loop where the plan keeps generating the
// same task with the same single segment, which doesn't converge the index to
// a steady state
spec := testCyclesSpec{
descrip: "mssswdbm",
verbose: os.Getenv("VERBOSE") == "mssswdbm" || os.Getenv("VERBOSE") == "y",
o: o,
segments: []Segment{
&segment{
MyId: 2,
MyFullSize: 78059,
MyLiveSize: 78059,
MyFileSize: 129475914,
},
&segment{
MyId: 1,
MyFullSize: 3959,
MyLiveSize: 3959,
MyFileSize: 24805725,
},
},
}
plan, err := Plan(spec.segments, spec.o)
if err != nil {
t.Fatalf("Plan failed, err: %v", err)
}
if len(plan.Tasks) > 0 {
t.Fatalf("expected 0 tasks, got: %d", len(plan.Tasks))
}
}
================================================
FILE: index/scorch/mergeplan/sort.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mergeplan
type byLiveSizeDescending []Segment
func (a byLiveSizeDescending) Len() int { return len(a) }
func (a byLiveSizeDescending) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
func (a byLiveSizeDescending) Less(i, j int) bool {
if a[i].LiveSize() != a[j].LiveSize() {
return a[i].LiveSize() > a[j].LiveSize()
}
return a[i].Id() < a[j].Id()
}
================================================
FILE: index/scorch/optimize.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"fmt"
"sync/atomic"
"github.com/RoaringBitmap/roaring/v2"
index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"
)
var OptimizeConjunction = true
var OptimizeConjunctionUnadorned = true
var OptimizeDisjunctionUnadorned = true
func (s *IndexSnapshotTermFieldReader) Optimize(kind string,
octx index.OptimizableContext) (index.OptimizableContext, error) {
if OptimizeConjunction && kind == "conjunction" {
return s.optimizeConjunction(octx)
}
if OptimizeConjunctionUnadorned && kind == "conjunction:unadorned" {
return s.optimizeConjunctionUnadorned(octx)
}
if OptimizeDisjunctionUnadorned && kind == "disjunction:unadorned" {
return s.optimizeDisjunctionUnadorned(octx)
}
return nil, nil
}
var OptimizeDisjunctionUnadornedMinChildCardinality = uint64(256)
// ----------------------------------------------------------------
func (s *IndexSnapshotTermFieldReader) optimizeConjunction(
octx index.OptimizableContext) (index.OptimizableContext, error) {
if octx == nil {
octx = &OptimizeTFRConjunction{snapshot: s.snapshot}
}
o, ok := octx.(*OptimizeTFRConjunction)
if !ok {
return octx, nil
}
if o.snapshot != s.snapshot {
return nil, fmt.Errorf("tried to optimize conjunction across different snapshots")
}
o.tfrs = append(o.tfrs, s)
return o, nil
}
type OptimizeTFRConjunction struct {
snapshot *IndexSnapshot
tfrs []*IndexSnapshotTermFieldReader
}
func (o *OptimizeTFRConjunction) Finish() (index.Optimized, error) {
if len(o.tfrs) <= 1 {
return nil, nil
}
for i := range o.snapshot.segment {
itr0, ok := o.tfrs[0].iterators[i].(segment.OptimizablePostingsIterator)
if !ok || itr0.ActualBitmap() == nil {
continue
}
itr1, ok := o.tfrs[1].iterators[i].(segment.OptimizablePostingsIterator)
if !ok || itr1.ActualBitmap() == nil {
continue
}
bm := roaring.And(itr0.ActualBitmap(), itr1.ActualBitmap())
for _, tfr := range o.tfrs[2:] {
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if !ok || itr.ActualBitmap() == nil {
continue
}
bm.And(itr.ActualBitmap())
}
// in this conjunction optimization, the postings iterators
// will all share the same AND'ed together actual bitmap. The
// regular conjunction searcher machinery will still be used,
// but the underlying bitmap will be smaller.
for _, tfr := range o.tfrs {
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if ok && itr.ActualBitmap() != nil {
itr.ReplaceActual(bm)
}
}
}
return nil, nil
}
// ----------------------------------------------------------------
// An "unadorned" conjunction optimization is appropriate when
// additional or subsidiary information like freq-norm's and
// term-vectors are not required, and instead only the internal-id's
// are needed.
func (s *IndexSnapshotTermFieldReader) optimizeConjunctionUnadorned(
octx index.OptimizableContext) (index.OptimizableContext, error) {
if octx == nil {
octx = &OptimizeTFRConjunctionUnadorned{snapshot: s.snapshot}
}
o, ok := octx.(*OptimizeTFRConjunctionUnadorned)
if !ok {
return nil, nil
}
if o.snapshot != s.snapshot {
return nil, fmt.Errorf("tried to optimize unadorned conjunction across different snapshots")
}
o.tfrs = append(o.tfrs, s)
return o, nil
}
type OptimizeTFRConjunctionUnadorned struct {
snapshot *IndexSnapshot
tfrs []*IndexSnapshotTermFieldReader
}
var OptimizeTFRConjunctionUnadornedTerm = []byte("")
var OptimizeTFRConjunctionUnadornedField = "*"
// Finish of an unadorned conjunction optimization will compute a
// termFieldReader with an "actual" bitmap that represents the
// constituent bitmaps AND'ed together. This termFieldReader cannot
// provide any freq-norm or termVector associated information.
func (o *OptimizeTFRConjunctionUnadorned) Finish() (rv index.Optimized, err error) {
if len(o.tfrs) <= 1 {
return nil, nil
}
// We use an artificial term and field because the optimized
// termFieldReader can represent multiple terms and fields.
oTFR := o.snapshot.unadornedTermFieldReader(
OptimizeTFRConjunctionUnadornedTerm, OptimizeTFRConjunctionUnadornedField)
var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
OUTER:
for i := range o.snapshot.segment {
actualBMs = actualBMs[:0]
var docNum1HitLast uint64
var docNum1HitLastOk bool
for _, tfr := range o.tfrs {
if _, ok := tfr.iterators[i].(*emptyPostingsIterator); ok {
// An empty postings iterator means the entire AND is empty.
oTFR.iterators[i] = anEmptyPostingsIterator
continue OUTER
}
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if !ok {
// We only optimize postings iterators that support this operation.
return nil, nil
}
// If the postings iterator is "1-hit" optimized, then we
// can perform several optimizations up-front here.
docNum1Hit, ok := itr.DocNum1Hit()
if ok {
if docNum1HitLastOk && docNum1HitLast != docNum1Hit {
// The docNum1Hit doesn't match the previous
// docNum1HitLast, so the entire AND is empty.
oTFR.iterators[i] = anEmptyPostingsIterator
continue OUTER
}
docNum1HitLast = docNum1Hit
docNum1HitLastOk = true
continue
}
if itr.ActualBitmap() == nil {
// An empty actual bitmap means the entire AND is empty.
oTFR.iterators[i] = anEmptyPostingsIterator
continue OUTER
}
// Collect the actual bitmap for more processing later.
actualBMs = append(actualBMs, itr.ActualBitmap())
}
if docNum1HitLastOk {
// We reach here if all the 1-hit optimized posting
// iterators had the same 1-hit docNum, so we can check if
// our collected actual bitmaps also have that docNum.
for _, bm := range actualBMs {
if !bm.Contains(uint32(docNum1HitLast)) {
// The docNum1Hit isn't in one of our actual
// bitmaps, so the entire AND is empty.
oTFR.iterators[i] = anEmptyPostingsIterator
continue OUTER
}
}
// The actual bitmaps and docNum1Hits all contain or have
// the same 1-hit docNum, so that's our AND'ed result.
oTFR.iterators[i] = newUnadornedPostingsIteratorFrom1Hit(docNum1HitLast)
continue OUTER
}
if len(actualBMs) == 0 {
// If we've collected no actual bitmaps at this point,
// then the entire AND is empty.
oTFR.iterators[i] = anEmptyPostingsIterator
continue OUTER
}
if len(actualBMs) == 1 {
// If we've only 1 actual bitmap, then that's our result.
oTFR.iterators[i] = newUnadornedPostingsIteratorFromBitmap(actualBMs[0])
continue OUTER
}
// Else, AND together our collected bitmaps as our result.
bm := roaring.And(actualBMs[0], actualBMs[1])
for _, actualBM := range actualBMs[2:] {
bm.And(actualBM)
}
oTFR.iterators[i] = newUnadornedPostingsIteratorFromBitmap(bm)
}
atomic.AddUint64(&o.snapshot.parent.stats.TotTermSearchersStarted, uint64(1))
return oTFR, nil
}
// ----------------------------------------------------------------
// An "unadorned" disjunction optimization is appropriate when
// additional or subsidiary information like freq-norm's and
// term-vectors are not required, and instead only the internal-id's
// are needed.
func (s *IndexSnapshotTermFieldReader) optimizeDisjunctionUnadorned(
octx index.OptimizableContext) (index.OptimizableContext, error) {
if octx == nil {
octx = &OptimizeTFRDisjunctionUnadorned{
snapshot: s.snapshot,
}
}
o, ok := octx.(*OptimizeTFRDisjunctionUnadorned)
if !ok {
return nil, nil
}
if o.snapshot != s.snapshot {
return nil, fmt.Errorf("tried to optimize unadorned disjunction across different snapshots")
}
o.tfrs = append(o.tfrs, s)
return o, nil
}
type OptimizeTFRDisjunctionUnadorned struct {
snapshot *IndexSnapshot
tfrs []*IndexSnapshotTermFieldReader
}
var OptimizeTFRDisjunctionUnadornedTerm = []byte("")
var OptimizeTFRDisjunctionUnadornedField = "*"
// Finish of an unadorned disjunction optimization will compute a
// termFieldReader with an "actual" bitmap that represents the
// constituent bitmaps OR'ed together. This termFieldReader cannot
// provide any freq-norm or termVector associated information.
func (o *OptimizeTFRDisjunctionUnadorned) Finish() (rv index.Optimized, err error) {
if len(o.tfrs) <= 1 {
return nil, nil
}
for i := range o.snapshot.segment {
var cMax uint64
for _, tfr := range o.tfrs {
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if !ok {
return nil, nil
}
if itr.ActualBitmap() != nil {
c := itr.ActualBitmap().GetCardinality()
if cMax < c {
cMax = c
}
}
}
}
// We use an artificial term and field because the optimized
// termFieldReader can represent multiple terms and fields.
oTFR := o.snapshot.unadornedTermFieldReader(
OptimizeTFRDisjunctionUnadornedTerm, OptimizeTFRDisjunctionUnadornedField)
var docNums []uint32 // Collected docNum's from 1-hit posting lists.
var actualBMs []*roaring.Bitmap // Collected from regular posting lists.
for i := range o.snapshot.segment {
docNums = docNums[:0]
actualBMs = actualBMs[:0]
for _, tfr := range o.tfrs {
itr, ok := tfr.iterators[i].(segment.OptimizablePostingsIterator)
if !ok {
return nil, nil
}
docNum, ok := itr.DocNum1Hit()
if ok {
docNums = append(docNums, uint32(docNum))
continue
}
if itr.ActualBitmap() != nil {
actualBMs = append(actualBMs, itr.ActualBitmap())
}
}
var bm *roaring.Bitmap
if len(actualBMs) > 2 {
bm = roaring.HeapOr(actualBMs...)
} else if len(actualBMs) == 2 {
bm = roaring.Or(actualBMs[0], actualBMs[1])
} else if len(actualBMs) == 1 {
bm = actualBMs[0].Clone()
}
if bm == nil {
bm = roaring.New()
}
bm.AddMany(docNums)
oTFR.iterators[i] = newUnadornedPostingsIteratorFromBitmap(bm)
}
atomic.AddUint64(&o.snapshot.parent.stats.TotTermSearchersStarted, uint64(1))
return oTFR, nil
}
// ----------------------------------------------------------------
func (i *IndexSnapshot) unadornedTermFieldReader(
term []byte, field string) *IndexSnapshotTermFieldReader {
// This IndexSnapshotTermFieldReader will not be recycled, more
// conversation here: https://github.com/blevesearch/bleve/pull/1438
return &IndexSnapshotTermFieldReader{
term: term,
field: field,
snapshot: i,
iterators: make([]segment.PostingsIterator, len(i.segment)),
segmentOffset: 0,
includeFreq: false,
includeNorm: false,
includeTermVectors: false,
recycle: false,
// signal downstream that this is a special unadorned termFieldReader
unadorned: true,
// unadorned TFRs do not require bytes read tracking
updateBytesRead: false,
}
}
================================================
FILE: index/scorch/optimize_knn.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package scorch
import (
"context"
"fmt"
"sync"
"sync/atomic"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
segment_api "github.com/blevesearch/scorch_segment_api/v2"
)
type OptimizeVR struct {
ctx context.Context
snapshot *IndexSnapshot
totalCost uint64
// maps field to vector readers
vrs map[string][]*IndexSnapshotVectorReader
}
// This setting _MUST_ only be changed during init and not after.
var BleveMaxKNNConcurrency = 10
func (o *OptimizeVR) invokeSearcherEndCallback() {
if o.ctx != nil {
if cb := o.ctx.Value(search.SearcherEndCallbackKey); cb != nil {
if cbF, ok := cb.(search.SearcherEndCallbackFn); ok {
if o.totalCost > 0 {
// notify the callback that the searcher creation etc. is finished
// and report back the total cost for it to track and take actions
// appropriately.
_ = cbF(o.totalCost)
}
}
}
}
}
func (o *OptimizeVR) Finish() error {
// for each field, get the vector index --> invoke the zap func.
// for each VR, populate postings list and iterators
// by passing the obtained vector index and getting similar vectors.
// defer close index - just once.
var errorsM sync.Mutex
var errors []error
defer o.invokeSearcherEndCallback()
wg := sync.WaitGroup{}
semaphore := make(chan struct{}, BleveMaxKNNConcurrency)
// Launch goroutines to get vector index for each segment
for i, seg := range o.snapshot.segment {
if sv, ok := seg.segment.(segment_api.VectorSegment); ok {
wg.Add(1)
semaphore <- struct{}{} // Acquire a semaphore slot
go func(index int, segment segment_api.VectorSegment, origSeg *SegmentSnapshot) {
defer func() {
<-semaphore // Release the semaphore slot
wg.Done()
}()
for field, vrs := range o.vrs {
// Early exit if the field is supposed to be completely deleted or
// if it's index data has been deleted
if info, ok := o.snapshot.updatedFields[field]; ok && (info.Deleted || info.Index) {
continue
}
vecIndex, err := segment.InterpretVectorIndex(field, origSeg.deleted)
if err != nil {
errorsM.Lock()
errors = append(errors, err)
errorsM.Unlock()
return
}
// update the vector index size as a meta value in the segment snapshot
vectorIndexSize := vecIndex.Size()
origSeg.cachedMeta.updateMeta(field, vectorIndexSize)
for _, vr := range vrs {
var pl segment_api.VecPostingsList
var err error
// for each VR, populate postings list and iterators
// by passing the obtained vector index and getting similar vectors.
// check if the vector reader is configured to use a pre-filter
// to filter out ineligible documents before performing
// kNN search.
if vr.eligibleSelector != nil {
pl, err = vecIndex.SearchWithFilter(vr.vector, vr.k,
vr.eligibleSelector.SegmentEligibleDocuments(index), vr.searchParams)
} else {
pl, err = vecIndex.Search(vr.vector, vr.k, vr.searchParams)
}
if err != nil {
errorsM.Lock()
errors = append(errors, err)
errorsM.Unlock()
go vecIndex.Close()
return
}
atomic.AddUint64(&o.snapshot.parent.stats.TotKNNSearches, uint64(1))
// postings and iterators are already alloc'ed when
// IndexSnapshotVectorReader is created
vr.postings[index] = pl
vr.iterators[index] = pl.Iterator(vr.iterators[index])
}
go vecIndex.Close()
}
}(i, sv, seg)
}
}
wg.Wait()
close(semaphore)
if len(errors) > 0 {
return errors[0]
}
return nil
}
func (s *IndexSnapshotVectorReader) VectorOptimize(ctx context.Context,
octx index.VectorOptimizableContext,
) (index.VectorOptimizableContext, error) {
if s.snapshot.parent.segPlugin.Version() < VectorSearchSupportedSegmentVersion {
return nil, fmt.Errorf("vector search not supported for this index, "+
"index's segment version %v, supported segment version for vector search %v",
s.snapshot.parent.segPlugin.Version(), VectorSearchSupportedSegmentVersion)
}
if octx == nil {
octx = &OptimizeVR{
snapshot: s.snapshot,
vrs: make(map[string][]*IndexSnapshotVectorReader),
}
}
o, ok := octx.(*OptimizeVR)
if !ok {
return octx, nil
}
o.ctx = ctx
if o.snapshot != s.snapshot {
o.invokeSearcherEndCallback()
return nil, fmt.Errorf("tried to optimize KNN across different snapshots")
}
// for every searcher creation, consult the segment snapshot to see
// what's the vector index size and since you're anyways going
// to use this vector index to perform the search etc. as part of the Finish()
// perform a check as to whether we allow the searcher creation (the downstream)
// Finish() logic to even occur or not.
var sumVectorIndexSize uint64
for _, seg := range o.snapshot.segment {
vecIndexSize := seg.cachedMeta.fetchMeta(s.field)
if vecIndexSize != nil {
sumVectorIndexSize += vecIndexSize.(uint64)
}
}
if o.ctx != nil {
if cb := o.ctx.Value(search.SearcherStartCallbackKey); cb != nil {
if cbF, ok := cb.(search.SearcherStartCallbackFn); ok {
err := cbF(sumVectorIndexSize)
if err != nil {
// it's important to invoke the end callback at this point since
// if the earlier searchers of this optimize struct were successful
// the cost corresponding to it would be incremented and if the
// current searcher fails the check then we end up erroring out
// the overall optimized searcher creation, the cost needs to be
// handled appropriately.
o.invokeSearcherEndCallback()
return nil, err
}
}
}
}
// total cost is essentially the sum of the vector indexes' size across all the
// searchers - all of them end up reading and maintaining a vector index.
// misacconting this value would end up calling the "end" callback with a value
// not equal to the value passed to "start" callback.
o.totalCost += sumVectorIndexSize
o.vrs[s.field] = append(o.vrs[s.field], s)
return o, nil
}
================================================
FILE: index/scorch/persister.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"bytes"
"encoding/binary"
"encoding/json"
"fmt"
"io"
"log"
"math"
"os"
"path/filepath"
"slices"
"sort"
"strconv"
"strings"
"sync/atomic"
"time"
"github.com/RoaringBitmap/roaring/v2"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"
bolt "go.etcd.io/bbolt"
)
const persister = "persister"
// DefaultPersisterNapTimeMSec is kept to zero as this helps in direct
// persistence of segments with the default safe batch option.
// If the default safe batch option results in high number of
// files on disk, then users may initialise this configuration parameter
// with higher values so that the persister will nap a bit within it's
// work loop to favour better in-memory merging of segments to result
// in fewer segment files on disk. But that may come with an indexing
// performance overhead.
// Unsafe batch users are advised to override this to higher value
// for better performance especially with high data density.
var DefaultPersisterNapTimeMSec int = 0 // ms
// DefaultPersisterNapUnderNumFiles helps in controlling the pace of
// persister. At times of a slow merger progress with heavy file merging
// operations, its better to pace down the persister for letting the merger
// to catch up within a range defined by this parameter.
// Fewer files on disk (as per the merge plan) would result in keeping the
// file handle usage under limit, faster disk merger and a healthier index.
// Its been observed that such a loosely sync'ed introducer-persister-merger
// trio results in better overall performance.
var DefaultPersisterNapUnderNumFiles int = 1000
var DefaultMemoryPressurePauseThreshold uint64 = math.MaxUint64
type persisterOptions struct {
// PersisterNapTimeMSec controls the wait/delay injected into
// persistence workloop to improve the chances for
// a healthier and heavier in-memory merging
PersisterNapTimeMSec int
// PersisterNapTimeMSec > 0, and the number of files is less than
// PersisterNapUnderNumFiles, then the persister will sleep
// PersisterNapTimeMSec amount of time to improve the chances for
// a healthier and heavier in-memory merging
PersisterNapUnderNumFiles int
// MemoryPressurePauseThreshold let persister to have a better leeway
// for prudently performing the memory merge of segments on a memory
// pressure situation. Here the config value is an upper threshold
// for the number of paused application threads. The default value would
// be a very high number to always favour the merging of memory segments.
MemoryPressurePauseThreshold uint64
// NumPersisterWorkers decides the number of parallel workers that will
// perform the in-memory merge of segments followed by a flush operation.
NumPersisterWorkers int
// MaxSizeInMemoryMerge is the maximum size of data that a single persister
// worker is allowed to work on
MaxSizeInMemoryMergePerWorker int
}
type notificationChan chan struct{}
func (s *Scorch) persisterLoop() {
defer func() {
if r := recover(); r != nil {
s.fireAsyncError(NewScorchError(
persister,
fmt.Sprintf("panic: %v, path: %s", r, s.path),
ErrAsyncPanic,
))
}
s.asyncTasks.Done()
}()
var persistWatchers []*epochWatcher
var lastPersistedEpoch, lastMergedEpoch uint64
var ew *epochWatcher
var unpersistedCallbacks []index.BatchCallback
po, err := s.parsePersisterOptions()
if err != nil {
s.fireAsyncError(NewScorchError(
persister,
fmt.Sprintf("persisterOptions json parsing err: %v", err),
ErrOptionsParse,
))
return
}
OUTER:
for {
atomic.AddUint64(&s.stats.TotPersistLoopBeg, 1)
select {
case <-s.closeCh:
break OUTER
case ew = <-s.persisterNotifier:
persistWatchers = append(persistWatchers, ew)
default:
}
if ew != nil && ew.epoch > lastMergedEpoch {
lastMergedEpoch = ew.epoch
}
lastMergedEpoch, persistWatchers = s.pausePersisterForMergerCatchUp(lastPersistedEpoch,
lastMergedEpoch, persistWatchers, po)
var ourSnapshot *IndexSnapshot
var ourPersisted []chan error
var ourPersistedCallbacks []index.BatchCallback
// check to see if there is a new snapshot to persist
s.rootLock.Lock()
if s.root != nil && s.root.epoch > lastPersistedEpoch {
ourSnapshot = s.root
ourSnapshot.AddRef()
ourPersisted = s.rootPersisted
s.rootPersisted = nil
ourPersistedCallbacks = s.persistedCallbacks
s.persistedCallbacks = nil
atomic.StoreUint64(&s.iStats.persistSnapshotSize, uint64(ourSnapshot.Size()))
atomic.StoreUint64(&s.iStats.persistEpoch, ourSnapshot.epoch)
}
s.rootLock.Unlock()
if ourSnapshot != nil {
startTime := time.Now()
err := s.persistSnapshot(ourSnapshot, po)
for _, ch := range ourPersisted {
if err != nil {
ch <- err
}
close(ch)
}
if err != nil {
atomic.StoreUint64(&s.iStats.persistEpoch, 0)
if err == segment.ErrClosed {
// index has been closed
_ = ourSnapshot.DecRef()
break OUTER
}
// save this current snapshot's persistedCallbacks, to invoke during
// the retry attempt
unpersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
s.fireAsyncError(NewScorchError(
persister,
fmt.Sprintf("got err persisting snapshot: %v", err),
ErrPersist,
))
_ = ourSnapshot.DecRef()
atomic.AddUint64(&s.stats.TotPersistLoopErr, 1)
continue OUTER
}
if unpersistedCallbacks != nil {
// in the event of this being a retry attempt for persisting a snapshot
// that had earlier failed, prepend the persistedCallbacks associated
// with earlier segment(s) to the latest persistedCallbacks
ourPersistedCallbacks = append(unpersistedCallbacks, ourPersistedCallbacks...)
unpersistedCallbacks = nil
}
for i := range ourPersistedCallbacks {
ourPersistedCallbacks[i](err)
}
atomic.StoreUint64(&s.stats.LastPersistedEpoch, ourSnapshot.epoch)
lastPersistedEpoch = ourSnapshot.epoch
for _, ew := range persistWatchers {
close(ew.notifyCh)
}
persistWatchers = nil
_ = ourSnapshot.DecRef()
changed := false
s.rootLock.RLock()
if s.root != nil && s.root.epoch != lastPersistedEpoch {
changed = true
}
s.rootLock.RUnlock()
s.fireEvent(EventKindPersisterProgress, time.Since(startTime))
if changed {
atomic.AddUint64(&s.stats.TotPersistLoopProgress, 1)
continue OUTER
}
}
// tell the introducer we're waiting for changes
w := &epochWatcher{
epoch: lastPersistedEpoch,
notifyCh: make(notificationChan, 1),
}
select {
case <-s.closeCh:
break OUTER
case s.introducerNotifier <- w:
}
if ok := s.fireEvent(EventKindPurgerCheck, 0); ok {
s.removeOldData() // might as well cleanup while waiting
}
atomic.AddUint64(&s.stats.TotPersistLoopWait, 1)
select {
case <-s.closeCh:
break OUTER
case <-w.notifyCh:
// woken up, next loop should pick up work
atomic.AddUint64(&s.stats.TotPersistLoopWaitNotified, 1)
case ew = <-s.persisterNotifier:
// if the watchers are already caught up then let them wait,
// else let them continue to do the catch up
persistWatchers = append(persistWatchers, ew)
}
atomic.AddUint64(&s.stats.TotPersistLoopEnd, 1)
}
}
func notifyMergeWatchers(lastPersistedEpoch uint64,
persistWatchers []*epochWatcher,
) []*epochWatcher {
var watchersNext []*epochWatcher
for _, w := range persistWatchers {
if w.epoch < lastPersistedEpoch {
close(w.notifyCh)
} else {
watchersNext = append(watchersNext, w)
}
}
return watchersNext
}
func (s *Scorch) pausePersisterForMergerCatchUp(lastPersistedEpoch uint64,
lastMergedEpoch uint64, persistWatchers []*epochWatcher,
po *persisterOptions,
) (uint64, []*epochWatcher) {
// First, let the watchers proceed if they lag behind
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
// Check the merger lag by counting the segment files on disk,
numFilesOnDisk, _, _ := s.diskFileStats(nil)
// On finding fewer files on disk, persister takes a short pause
// for sufficient in-memory segments to pile up for the next
// memory merge cum persist loop.
if numFilesOnDisk < uint64(po.PersisterNapUnderNumFiles) &&
po.PersisterNapTimeMSec > 0 && s.NumEventsBlocking() == 0 {
select {
case <-s.closeCh:
case <-time.After(time.Millisecond * time.Duration(po.PersisterNapTimeMSec)):
atomic.AddUint64(&s.stats.TotPersisterNapPauseCompleted, 1)
case ew := <-s.persisterNotifier:
// unblock the merger in meantime
persistWatchers = append(persistWatchers, ew)
lastMergedEpoch = ew.epoch
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
atomic.AddUint64(&s.stats.TotPersisterMergerNapBreak, 1)
}
return lastMergedEpoch, persistWatchers
}
// Finding too many files on disk could be due to two reasons.
// 1. Too many older snapshots awaiting the clean up.
// 2. The merger could be lagging behind on merging the disk files.
if numFilesOnDisk > uint64(po.PersisterNapUnderNumFiles) {
if ok := s.fireEvent(EventKindPurgerCheck, 0); ok {
s.removeOldData()
}
numFilesOnDisk, _, _ = s.diskFileStats(nil)
}
// Persister pause until the merger catches up to reduce the segment
// file count under the threshold.
// But if there is memory pressure, then skip this sleep maneuvers.
OUTER:
for po.PersisterNapUnderNumFiles > 0 &&
numFilesOnDisk >= uint64(po.PersisterNapUnderNumFiles) &&
lastMergedEpoch < lastPersistedEpoch {
atomic.AddUint64(&s.stats.TotPersisterSlowMergerPause, 1)
select {
case <-s.closeCh:
break OUTER
case ew := <-s.persisterNotifier:
persistWatchers = append(persistWatchers, ew)
lastMergedEpoch = ew.epoch
}
atomic.AddUint64(&s.stats.TotPersisterSlowMergerResume, 1)
// let the watchers proceed if they lag behind
persistWatchers = notifyMergeWatchers(lastPersistedEpoch, persistWatchers)
numFilesOnDisk, _, _ = s.diskFileStats(nil)
}
return lastMergedEpoch, persistWatchers
}
func (s *Scorch) parsePersisterOptions() (*persisterOptions, error) {
po := persisterOptions{
PersisterNapTimeMSec: DefaultPersisterNapTimeMSec,
PersisterNapUnderNumFiles: DefaultPersisterNapUnderNumFiles,
MemoryPressurePauseThreshold: DefaultMemoryPressurePauseThreshold,
NumPersisterWorkers: DefaultNumPersisterWorkers,
MaxSizeInMemoryMergePerWorker: DefaultMaxSizeInMemoryMergePerWorker,
}
if v, ok := s.config["scorchPersisterOptions"]; ok {
b, err := util.MarshalJSON(v)
if err != nil {
return &po, err
}
err = util.UnmarshalJSON(b, &po)
if err != nil {
return &po, err
}
}
return &po, nil
}
func (s *Scorch) persistSnapshot(snapshot *IndexSnapshot,
po *persisterOptions,
) error {
// Perform in-memory segment merging only when the memory pressure is
// below the configured threshold, else the persister performs the
// direct persistence of segments.
if s.NumEventsBlocking() < po.MemoryPressurePauseThreshold {
persisted, err := s.persistSnapshotMaybeMerge(snapshot, po)
if err != nil {
return err
}
if persisted {
return nil
}
}
return s.persistSnapshotDirect(snapshot, nil)
}
// DefaultMinSegmentsForInMemoryMerge represents the default number of
// in-memory zap segments that persistSnapshotMaybeMerge() needs to
// see in an IndexSnapshot before it decides to merge and persist
// those segments
var DefaultMinSegmentsForInMemoryMerge = 2
type flushable struct {
segments []segment.Segment
drops []*roaring.Bitmap
sbIdxs []int
totDocs uint64
}
// number workers which parallelly perform an in-memory merge of the segments
// followed by a flush operation.
var DefaultNumPersisterWorkers = 1
// maximum size of data that a single worker is allowed to perform the in-memory
// merge operation.
var DefaultMaxSizeInMemoryMergePerWorker = 0
func legacyFlushBehaviour(maxSizeInMemoryMergePerWorker, numPersisterWorkers int) bool {
// DefaultMaxSizeInMemoryMergePerWorker = 0 is a special value to preserve the legacy
// one-shot in-memory merge + flush behaviour.
return maxSizeInMemoryMergePerWorker == 0 && numPersisterWorkers == 1
}
// persistSnapshotMaybeMerge examines the snapshot and might merge and
// persist the in-memory zap segments if there are enough of them
func (s *Scorch) persistSnapshotMaybeMerge(snapshot *IndexSnapshot, po *persisterOptions) (
bool, error) {
// collect the in-memory zap segments (SegmentBase instances)
var sbs []segment.Segment
var sbsDrops []*roaring.Bitmap
var sbsIndexes []int
var oldSegIdxs []int
flushSet := make([]*flushable, 0)
var totSize int
var numSegsToFlushOut int
var totDocs uint64
// legacy behaviour of merge + flush of all in-memory segments in one-shot
if legacyFlushBehaviour(po.MaxSizeInMemoryMergePerWorker, po.NumPersisterWorkers) {
val := &flushable{
segments: make([]segment.Segment, 0),
drops: make([]*roaring.Bitmap, 0),
sbIdxs: make([]int, 0),
totDocs: totDocs,
}
for i, snapshot := range snapshot.segment {
if _, ok := snapshot.segment.(segment.PersistedSegment); !ok {
val.segments = append(val.segments, snapshot.segment)
val.drops = append(val.drops, snapshot.deleted)
val.sbIdxs = append(val.sbIdxs, i)
oldSegIdxs = append(oldSegIdxs, i)
val.totDocs += snapshot.segment.Count()
numSegsToFlushOut++
}
}
flushSet = append(flushSet, val)
} else {
// constructs a flushSet where each flushable object contains a set of segments
// to be merged and flushed out to disk.
for i, snapshot := range snapshot.segment {
if totSize >= po.MaxSizeInMemoryMergePerWorker &&
len(sbs) >= DefaultMinSegmentsForInMemoryMerge {
numSegsToFlushOut += len(sbs)
val := &flushable{
segments: slices.Clone(sbs),
drops: slices.Clone(sbsDrops),
sbIdxs: slices.Clone(sbsIndexes),
totDocs: totDocs,
}
flushSet = append(flushSet, val)
oldSegIdxs = append(oldSegIdxs, sbsIndexes...)
sbs, sbsDrops, sbsIndexes = sbs[:0], sbsDrops[:0], sbsIndexes[:0]
totSize, totDocs = 0, 0
}
if len(flushSet) >= int(po.NumPersisterWorkers) {
break
}
if _, ok := snapshot.segment.(segment.PersistedSegment); !ok {
sbs = append(sbs, snapshot.segment)
sbsDrops = append(sbsDrops, snapshot.deleted)
sbsIndexes = append(sbsIndexes, i)
totDocs += snapshot.segment.Count()
totSize += snapshot.segment.Size()
}
}
// if there were too few segments just merge them all as part of a single worker
if len(flushSet) < po.NumPersisterWorkers {
numSegsToFlushOut += len(sbs)
val := &flushable{
segments: slices.Clone(sbs),
drops: slices.Clone(sbsDrops),
sbIdxs: slices.Clone(sbsIndexes),
totDocs: totDocs,
}
flushSet = append(flushSet, val)
oldSegIdxs = append(oldSegIdxs, sbsIndexes...)
}
}
if numSegsToFlushOut < DefaultMinSegmentsForInMemoryMerge {
return false, nil
}
// the newSnapshot at this point would contain the newly created file segments
// and updated with the root.
newSnapshot, newSegmentIDs, err := s.mergeAndPersistInMemorySegments(snapshot, flushSet)
if err != nil {
return false, err
}
if newSnapshot == nil {
return false, nil
}
defer func() {
_ = newSnapshot.DecRef()
}()
mergedSegmentIDs := map[uint64]struct{}{}
for _, idx := range oldSegIdxs {
mergedSegmentIDs[snapshot.segment[idx].id] = struct{}{}
}
newMergedSegmentIDs := make(map[uint64]struct{}, len(newSegmentIDs))
for _, id := range newSegmentIDs {
newMergedSegmentIDs[id] = struct{}{}
}
// construct a snapshot that's logically equivalent to the input
// snapshot, but with merged segments replaced by the new segment
equiv := &IndexSnapshot{
parent: snapshot.parent,
segment: make([]*SegmentSnapshot, 0, len(snapshot.segment)),
internal: snapshot.internal,
epoch: snapshot.epoch,
creator: "persistSnapshotMaybeMerge",
}
// to track which segments haven't participated in the in-memory merge
// they won't be flushed out to the disk yet, but in the next cycle will be
// merged in-memory and then flushed out - this is to keep the number of
// on-disk files in limit.
exclude := make(map[uint64]struct{})
// copy to the equiv the segments that weren't replaced
for _, segment := range snapshot.segment {
if _, wasMerged := mergedSegmentIDs[segment.id]; !wasMerged {
equiv.segment = append(equiv.segment, segment)
exclude[segment.id] = struct{}{}
}
}
// append to the equiv the newly merged segments
for _, segment := range newSnapshot.segment {
if _, ok := newMergedSegmentIDs[segment.id]; ok {
equiv.segment = append(equiv.segment, &SegmentSnapshot{
id: segment.id,
segment: segment.segment,
deleted: nil, // nil since merging handled deletions
stats: nil,
})
}
}
err = s.persistSnapshotDirect(equiv, exclude)
if err != nil {
return false, err
}
return true, nil
}
func copyToDirectory(srcPath string, d index.Directory) (int64, error) {
if d == nil {
return 0, nil
}
dest, err := d.GetWriter(filepath.Join("store", filepath.Base(srcPath)))
if err != nil {
return 0, fmt.Errorf("GetWriter err: %v", err)
}
sourceFileStat, err := os.Stat(srcPath)
if err != nil {
return 0, err
}
if !sourceFileStat.Mode().IsRegular() {
return 0, fmt.Errorf("%s is not a regular file", srcPath)
}
source, err := os.Open(srcPath)
if err != nil {
return 0, err
}
defer source.Close()
defer dest.Close()
return io.Copy(dest, source)
}
func persistToDirectory(seg segment.UnpersistedSegment, d index.Directory,
path string,
) error {
if d == nil {
return seg.Persist(path)
}
sg, ok := seg.(io.WriterTo)
if !ok {
return fmt.Errorf("no io.WriterTo segment implementation found")
}
w, err := d.GetWriter(filepath.Join("store", filepath.Base(path)))
if err != nil {
return err
}
_, err = sg.WriteTo(w)
w.Close()
return err
}
func prepareBoltSnapshot(snapshot *IndexSnapshot, tx *bolt.Tx, path string,
segPlugin SegmentPlugin, exclude map[uint64]struct{}, d index.Directory) (
[]string, map[uint64]string, error) {
snapshotsBucket, err := tx.CreateBucketIfNotExists(util.BoltSnapshotsBucket)
if err != nil {
return nil, nil, err
}
newSnapshotKey := encodeUvarintAscending(nil, snapshot.epoch)
snapshotBucket, err := snapshotsBucket.CreateBucketIfNotExists(newSnapshotKey)
if err != nil {
return nil, nil, err
}
// persist meta values
metaBucket, err := snapshotBucket.CreateBucketIfNotExists(util.BoltMetaDataKey)
if err != nil {
return nil, nil, err
}
err = metaBucket.Put(util.BoltMetaDataSegmentTypeKey, []byte(segPlugin.Type()))
if err != nil {
return nil, nil, err
}
buf := make([]byte, binary.MaxVarintLen32)
binary.BigEndian.PutUint32(buf, segPlugin.Version())
err = metaBucket.Put(util.BoltMetaDataSegmentVersionKey, buf)
if err != nil {
return nil, nil, err
}
// Storing the timestamp at which the current indexSnapshot
// was persisted, useful when you want to spread the
// numSnapshotsToKeep reasonably better than consecutive
// epochs.
currTimeStamp := time.Now()
timeStampBinary, err := currTimeStamp.MarshalText()
if err != nil {
return nil, nil, err
}
err = metaBucket.Put(util.BoltMetaDataTimeStamp, timeStampBinary)
if err != nil {
return nil, nil, err
}
// persist internal values
internalBucket, err := snapshotBucket.CreateBucketIfNotExists(util.BoltInternalKey)
if err != nil {
return nil, nil, err
}
// TODO optimize writing these in order?
for k, v := range snapshot.internal {
err = internalBucket.Put([]byte(k), v)
if err != nil {
return nil, nil, err
}
}
if snapshot.parent != nil {
val := make([]byte, 8)
bytesWritten := atomic.LoadUint64(&snapshot.parent.stats.TotBytesWrittenAtIndexTime)
binary.LittleEndian.PutUint64(val, bytesWritten)
err = internalBucket.Put(util.TotBytesWrittenKey, val)
if err != nil {
return nil, nil, err
}
}
filenames := make([]string, 0, len(snapshot.segment))
newSegmentPaths := make(map[uint64]string, len(snapshot.segment))
// first ensure that each segment in this snapshot has been persisted
for _, segmentSnapshot := range snapshot.segment {
snapshotSegmentKey := encodeUvarintAscending(nil, segmentSnapshot.id)
snapshotSegmentBucket, err := snapshotBucket.CreateBucketIfNotExists(snapshotSegmentKey)
if err != nil {
return nil, nil, err
}
switch seg := segmentSnapshot.segment.(type) {
case segment.PersistedSegment:
segPath := seg.Path()
_, err = copyToDirectory(segPath, d)
if err != nil {
return nil, nil, fmt.Errorf("segment: %s copy err: %v", segPath, err)
}
filename := filepath.Base(segPath)
err = snapshotSegmentBucket.Put(util.BoltPathKey, []byte(filename))
if err != nil {
return nil, nil, err
}
filenames = append(filenames, filename)
case segment.UnpersistedSegment:
// need to persist this to disk if its not part of exclude list (which
// restricts which in-memory segment to be persisted to disk)
if _, ok := exclude[segmentSnapshot.id]; !ok {
filename := zapFileName(segmentSnapshot.id)
path := filepath.Join(path, filename)
err := persistToDirectory(seg, d, path)
if err != nil {
return nil, nil, fmt.Errorf("segment: %s persist err: %v", path, err)
}
newSegmentPaths[segmentSnapshot.id] = path
err = snapshotSegmentBucket.Put(util.BoltPathKey, []byte(filename))
if err != nil {
return nil, nil, err
}
filenames = append(filenames, filename)
}
default:
return nil, nil, fmt.Errorf("unknown segment type: %T", seg)
}
// store current deleted bits
var roaringBuf bytes.Buffer
if segmentSnapshot.deleted != nil {
_, err = segmentSnapshot.deleted.WriteTo(&roaringBuf)
if err != nil {
return nil, nil, fmt.Errorf("error persisting roaring bytes: %v", err)
}
err = snapshotSegmentBucket.Put(util.BoltDeletedKey, roaringBuf.Bytes())
if err != nil {
return nil, nil, err
}
}
// store segment stats
if segmentSnapshot.stats != nil {
b, err := json.Marshal(segmentSnapshot.stats.Fetch())
if err != nil {
return nil, nil, err
}
err = snapshotSegmentBucket.Put(util.BoltStatsKey, b)
if err != nil {
return nil, nil, err
}
}
// store updated field info
if segmentSnapshot.updatedFields != nil {
b, err := json.Marshal(segmentSnapshot.updatedFields)
if err != nil {
return nil, nil, err
}
err = snapshotSegmentBucket.Put(util.BoltUpdatedFieldsKey, b)
if err != nil {
return nil, nil, err
}
}
}
return filenames, newSegmentPaths, nil
}
func (s *Scorch) persistSnapshotDirect(snapshot *IndexSnapshot, exclude map[uint64]struct{}) (err error) {
// start a write transaction
tx, err := s.rootBolt.Begin(true)
if err != nil {
return err
}
// defer rollback on error
defer func() {
if err != nil {
_ = tx.Rollback()
}
}()
filenames, newSegmentPaths, err := prepareBoltSnapshot(snapshot, tx, s.path, s.segPlugin, exclude, nil)
if err != nil {
return err
}
// we need to swap in a new root only when we've persisted 1 or
// more segments -- whereby the new root would have 1-for-1
// replacements of in-memory segments with file-based segments
//
// other cases like updates to internal values only, and/or when
// there are only deletions, are already covered and persisted by
// the newly populated boltdb snapshotBucket above
if len(newSegmentPaths) > 0 {
// now try to open all the new snapshots
newSegments := make(map[uint64]segment.Segment, len(newSegmentPaths))
defer func() {
for _, s := range newSegments {
if s != nil {
// cleanup segments that were opened but not
// swapped into the new root
_ = s.Close()
}
}
}()
for segmentID, path := range newSegmentPaths {
newSegments[segmentID], err = s.segPlugin.OpenUsing(path, s.segmentConfig)
if err != nil {
return fmt.Errorf("error opening new segment at %s, %v", path, err)
}
}
persist := &persistIntroduction{
persisted: newSegments,
applied: make(notificationChan),
}
select {
case <-s.closeCh:
return segment.ErrClosed
case s.persists <- persist:
}
select {
case <-s.closeCh:
return segment.ErrClosed
case <-persist.applied:
}
}
err = tx.Commit()
if err != nil {
return err
}
err = s.rootBolt.Sync()
if err != nil {
return err
}
// allow files to become eligible for removal after commit, such
// as file segments from snapshots that came from the merger
s.rootLock.Lock()
for _, filename := range filenames {
delete(s.ineligibleForRemoval, filename)
}
s.rootLock.Unlock()
return nil
}
func zapFileName(epoch uint64) string {
return fmt.Sprintf("%012x.zap", epoch)
}
// bolt snapshot code
func (s *Scorch) loadFromBolt() error {
err := s.rootBolt.View(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil
}
foundRoot := false
c := snapshots.Cursor()
for k, _ := c.Last(); k != nil; k, _ = c.Prev() {
_, snapshotEpoch, err := decodeUvarintAscending(k)
if err != nil {
log.Printf("unable to parse segment epoch %x, continuing", k)
continue
}
if foundRoot {
s.AddEligibleForRemoval(snapshotEpoch)
continue
}
snapshot := snapshots.Bucket(k)
if snapshot == nil {
log.Printf("snapshot key, but bucket missing %x, continuing", k)
s.AddEligibleForRemoval(snapshotEpoch)
continue
}
indexSnapshot, err := s.loadSnapshot(snapshot)
if err != nil {
log.Printf("unable to load snapshot, %v, continuing", err)
s.AddEligibleForRemoval(snapshotEpoch)
continue
}
indexSnapshot.epoch = snapshotEpoch
// set the nextSegmentID
s.nextSegmentID, err = s.maxSegmentIDOnDisk()
if err != nil {
return err
}
s.nextSegmentID++
s.rootLock.Lock()
s.nextSnapshotEpoch = snapshotEpoch + 1
rootPrev := s.root
s.root = indexSnapshot
s.rootLock.Unlock()
if rootPrev != nil {
_ = rootPrev.DecRef()
}
foundRoot = true
}
return nil
})
if err != nil {
return err
}
persistedSnapshots, err := s.rootBoltSnapshotMetaData()
if err != nil {
return err
}
s.checkPoints = persistedSnapshots
return nil
}
// LoadSnapshot loads the segment with the specified epoch
// NOTE: this is currently ONLY intended to be used by the command-line tool
func (s *Scorch) LoadSnapshot(epoch uint64) (rv *IndexSnapshot, err error) {
err = s.rootBolt.View(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil
}
snapshotKey := encodeUvarintAscending(nil, epoch)
snapshot := snapshots.Bucket(snapshotKey)
if snapshot == nil {
return fmt.Errorf("snapshot with epoch: %v - doesn't exist", epoch)
}
rv, err = s.loadSnapshot(snapshot)
return err
})
if err != nil {
return nil, err
}
return rv, nil
}
func (s *Scorch) loadSnapshot(snapshot *bolt.Bucket) (*IndexSnapshot, error) {
rv := &IndexSnapshot{
parent: s,
internal: make(map[string][]byte),
refs: 1,
creator: "loadSnapshot",
}
// first we look for the meta-data bucket, this will tell us
// which segment type/version was used for this snapshot
// all operations for this scorch will use this type/version
metaBucket := snapshot.Bucket(util.BoltMetaDataKey)
if metaBucket == nil {
_ = rv.DecRef()
return nil, fmt.Errorf("meta-data bucket missing")
}
segmentType := string(metaBucket.Get(util.BoltMetaDataSegmentTypeKey))
segmentVersion := binary.BigEndian.Uint32(
metaBucket.Get(util.BoltMetaDataSegmentVersionKey))
err := s.loadSegmentPlugin(segmentType, segmentVersion)
if err != nil {
_ = rv.DecRef()
return nil, fmt.Errorf(
"unable to load correct segment wrapper: %v", err)
}
var running uint64
c := snapshot.Cursor()
for k, _ := c.First(); k != nil; k, _ = c.Next() {
if k[0] == util.BoltInternalKey[0] {
internalBucket := snapshot.Bucket(k)
if internalBucket == nil {
_ = rv.DecRef()
return nil, fmt.Errorf("internal bucket missing")
}
err := internalBucket.ForEach(func(key []byte, val []byte) error {
copiedVal := append([]byte(nil), val...)
rv.internal[string(key)] = copiedVal
return nil
})
if err != nil {
_ = rv.DecRef()
return nil, err
}
} else if k[0] != util.BoltMetaDataKey[0] {
segmentBucket := snapshot.Bucket(k)
if segmentBucket == nil {
_ = rv.DecRef()
return nil, fmt.Errorf("segment key, but bucket missing %x", k)
}
segmentSnapshot, err := s.loadSegment(segmentBucket)
if err != nil {
_ = rv.DecRef()
return nil, fmt.Errorf("failed to load segment: %v", err)
}
_, segmentSnapshot.id, err = decodeUvarintAscending(k)
if err != nil {
_ = rv.DecRef()
return nil, fmt.Errorf("failed to decode segment id: %v", err)
}
rv.segment = append(rv.segment, segmentSnapshot)
rv.offsets = append(rv.offsets, running)
// Merge all segment level updated field info for use during queries
if segmentSnapshot.updatedFields != nil {
rv.MergeUpdateFieldsInfo(segmentSnapshot.updatedFields)
}
running += segmentSnapshot.segment.Count()
}
}
return rv, nil
}
func (s *Scorch) loadSegment(segmentBucket *bolt.Bucket) (*SegmentSnapshot, error) {
pathBytes := segmentBucket.Get(util.BoltPathKey)
if pathBytes == nil {
return nil, fmt.Errorf("segment path missing")
}
segmentPath := s.path + string(os.PathSeparator) + string(pathBytes)
seg, err := s.segPlugin.OpenUsing(segmentPath, s.segmentConfig)
if err != nil {
return nil, fmt.Errorf("error opening bolt segment: %v", err)
}
rv := &SegmentSnapshot{
segment: seg,
cachedDocs: &cachedDocs{cache: nil},
cachedMeta: &cachedMeta{meta: nil},
}
deletedBytes := segmentBucket.Get(util.BoltDeletedKey)
if deletedBytes != nil {
deletedBitmap := roaring.NewBitmap()
r := bytes.NewReader(deletedBytes)
_, err := deletedBitmap.ReadFrom(r)
if err != nil {
_ = seg.Close()
return nil, fmt.Errorf("error reading deleted bytes: %v", err)
}
if !deletedBitmap.IsEmpty() {
rv.deleted = deletedBitmap
}
}
statBytes := segmentBucket.Get(util.BoltStatsKey)
if statBytes != nil {
var statsMap map[string]map[string]uint64
err := json.Unmarshal(statBytes, &statsMap)
stats := &fieldStats{statMap: statsMap}
if err != nil {
_ = seg.Close()
return nil, fmt.Errorf("error reading stat bytes: %v", err)
}
rv.stats = stats
}
updatedFieldBytes := segmentBucket.Get(util.BoltUpdatedFieldsKey)
if updatedFieldBytes != nil {
var updatedFields map[string]*index.UpdateFieldInfo
err := json.Unmarshal(updatedFieldBytes, &updatedFields)
if err != nil {
_ = seg.Close()
return nil, fmt.Errorf("error reading updated field bytes: %v", err)
}
rv.updatedFields = updatedFields
// Set the value within the segment base for use during merge
rv.UpdateFieldsInfo(rv.updatedFields)
}
return rv, nil
}
func (s *Scorch) removeOldData() {
removed, err := s.removeOldBoltSnapshots()
if err != nil {
s.fireAsyncError(NewScorchError(
persister,
fmt.Sprintf("got err removing old bolt snapshots: %v", err),
ErrCleanup,
))
}
atomic.AddUint64(&s.stats.TotSnapshotsRemovedFromMetaStore, uint64(removed))
err = s.removeOldZapFiles()
if err != nil {
s.fireAsyncError(NewScorchError(
persister,
fmt.Sprintf("got err removing old zap files: %v", err),
ErrCleanup,
))
}
}
// NumSnapshotsToKeep represents how many recent, old snapshots to
// keep around per Scorch instance. Useful for apps that require
// rollback'ability.
var NumSnapshotsToKeep = 1
// RollbackSamplingInterval controls how far back we are looking
// in the history to get the rollback points.
// For example, a value of 10 minutes ensures that the
// protected snapshots (NumSnapshotsToKeep = 3) are:
//
// the very latest snapshot(ie the current one),
// the snapshot that was persisted 10 minutes before the current one,
// the snapshot that was persisted 20 minutes before the current one
//
// By default however, the timeseries way of protecting snapshots is
// disabled, and we protect the latest three contiguous snapshots
var RollbackSamplingInterval = 0 * time.Minute
// Controls what portion of the earlier rollback points to retain during
// a infrequent/sparse mutation scenario
var RollbackRetentionFactor = float64(0.5)
func getTimeSeriesSnapshots(maxDataPoints int, interval time.Duration,
snapshots []*snapshotMetaData,
) (int, map[uint64]time.Time) {
if interval == 0 {
return len(snapshots), map[uint64]time.Time{}
}
// the map containing the time series snapshots, i.e the timeseries of snapshots
// each of which is separated by rollbackSamplingInterval
rv := make(map[uint64]time.Time)
// the last point in the "time series", i.e. the timeseries of snapshots
// each of which is separated by rollbackSamplingInterval
ptr := len(snapshots) - 1
rv[snapshots[ptr].epoch] = snapshots[ptr].timeStamp
numSnapshotsProtected := 1
// traverse the list in reverse order, older timestamps to newer ones.
for i := ptr - 1; i >= 0; i-- {
// If we find a timeStamp which is the next datapoint in our
// timeseries of snapshots, and newer by RollbackSamplingInterval duration
// (comparison in terms of minutes), which is the interval of our time
// series. In this case, add the epoch rv
if snapshots[i].timeStamp.Sub(snapshots[ptr].timeStamp).Minutes() >
interval.Minutes() {
if _, ok := rv[snapshots[i+1].epoch]; !ok {
rv[snapshots[i+1].epoch] = snapshots[i+1].timeStamp
ptr = i + 1
numSnapshotsProtected++
}
} else if snapshots[i].timeStamp.Sub(snapshots[ptr].timeStamp).Minutes() ==
interval.Minutes() {
if _, ok := rv[snapshots[i].epoch]; !ok {
rv[snapshots[i].epoch] = snapshots[i].timeStamp
ptr = i
numSnapshotsProtected++
}
}
if numSnapshotsProtected >= maxDataPoints {
break
}
}
return ptr, rv
}
// getProtectedSnapshots aims to fetch the epochs keep based on a timestamp basis.
// It tries to get NumSnapshotsToKeep snapshots, each of which are separated
// by a time duration of RollbackSamplingInterval.
func getProtectedSnapshots(rollbackSamplingInterval time.Duration,
numSnapshotsToKeep int,
persistedSnapshots []*snapshotMetaData,
) map[uint64]time.Time {
// keep numSnapshotsToKeep - 1 worth of time series snapshots, because we always
// must preserve the very latest snapshot in bolt as well to avoid accidental
// deletes of bolt entries and cleanups by the purger code.
lastPoint, protectedEpochs := getTimeSeriesSnapshots(numSnapshotsToKeep-1,
rollbackSamplingInterval, persistedSnapshots)
if len(protectedEpochs) < numSnapshotsToKeep {
numSnapshotsNeeded := numSnapshotsToKeep - len(protectedEpochs)
// we protected the contiguous snapshots from the last point in time series
for i := 0; i < numSnapshotsNeeded && i < lastPoint; i++ {
protectedEpochs[persistedSnapshots[i].epoch] = persistedSnapshots[i].timeStamp
}
}
return protectedEpochs
}
func newCheckPoints(snapshots map[uint64]time.Time) []*snapshotMetaData {
rv := make([]*snapshotMetaData, 0)
keys := make([]uint64, 0, len(snapshots))
for k := range snapshots {
keys = append(keys, k)
}
sort.SliceStable(keys, func(i, j int) bool {
return snapshots[keys[i]].Sub(snapshots[keys[j]]) > 0
})
for _, key := range keys {
rv = append(rv, &snapshotMetaData{
epoch: key,
timeStamp: snapshots[key],
})
}
return rv
}
// Removes enough snapshots from the rootBolt so that the
// s.eligibleForRemoval stays under the NumSnapshotsToKeep policy.
func (s *Scorch) removeOldBoltSnapshots() (numRemoved int, err error) {
persistedSnapshots, err := s.rootBoltSnapshotMetaData()
if err != nil {
return 0, err
}
if len(persistedSnapshots) <= s.numSnapshotsToKeep {
// we need to keep everything
return 0, nil
}
protectedSnapshots := getProtectedSnapshots(s.rollbackSamplingInterval,
s.numSnapshotsToKeep, persistedSnapshots)
var epochsToRemove []uint64
var newEligible []uint64
s.rootLock.Lock()
for _, epoch := range s.eligibleForRemoval {
if _, ok := protectedSnapshots[epoch]; ok {
// protected
newEligible = append(newEligible, epoch)
} else {
epochsToRemove = append(epochsToRemove, epoch)
}
}
s.eligibleForRemoval = newEligible
s.rootLock.Unlock()
s.checkPoints = newCheckPoints(protectedSnapshots)
if len(epochsToRemove) == 0 {
return 0, nil
}
tx, err := s.rootBolt.Begin(true)
if err != nil {
return 0, err
}
defer func() {
if err == nil {
err = tx.Commit()
} else {
_ = tx.Rollback()
}
if err == nil {
err = s.rootBolt.Sync()
}
}()
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return 0, nil
}
for _, epochToRemove := range epochsToRemove {
k := encodeUvarintAscending(nil, epochToRemove)
err = snapshots.DeleteBucket(k)
if err == bolt.ErrBucketNotFound {
err = nil
}
if err == nil {
numRemoved++
}
}
return numRemoved, err
}
func (s *Scorch) maxSegmentIDOnDisk() (uint64, error) {
files, err := os.ReadDir(s.path)
if err != nil {
return 0, err
}
var rv uint64
for _, f := range files {
fname := f.Name()
if filepath.Ext(fname) == ".zap" {
prefix := strings.TrimSuffix(fname, ".zap")
id, err2 := strconv.ParseUint(prefix, 16, 64)
if err2 != nil {
return 0, err2
}
if id > rv {
rv = id
}
}
}
return rv, err
}
// Removes any *.zap files which aren't listed in the rootBolt.
func (s *Scorch) removeOldZapFiles() error {
liveFileNames, err := s.loadZapFileNames()
if err != nil {
return err
}
files, err := os.ReadDir(s.path)
if err != nil {
return err
}
s.rootLock.RLock()
for _, f := range files {
fname := f.Name()
if filepath.Ext(fname) == ".zap" {
if _, exists := liveFileNames[fname]; !exists && !s.ineligibleForRemoval[fname] && (s.copyScheduled[fname] <= 0) {
err := os.Remove(s.path + string(os.PathSeparator) + fname)
if err != nil {
log.Printf("got err removing file: %s, err: %v", fname, err)
}
}
}
}
s.rootLock.RUnlock()
return nil
}
// In sparse mutation scenario, it can so happen that all protected
// snapshots are older than the numSnapshotsToKeep * rollbackSamplingInterval
// duration. This results in all of them being purged from the boltDB
// and the next iteration of the removeOldData() would end up protecting
// latest contiguous snapshot which is a poor pattern in the rollback checkpoints.
// Hence we try to retain at most retentionFactor portion worth of old snapshots
// in such a scenario using the following function
func getBoundaryCheckPoint(retentionFactor float64,
checkPoints []*snapshotMetaData, timeStamp time.Time,
) time.Time {
if checkPoints != nil {
boundary := checkPoints[int(math.Floor(float64(len(checkPoints))*
retentionFactor))]
if timeStamp.Sub(boundary.timeStamp) > 0 {
// return the extended boundary which will dictate the older snapshots
// to be retained
return boundary.timeStamp
}
}
return timeStamp
}
type snapshotMetaData struct {
epoch uint64
timeStamp time.Time
}
func (s *Scorch) rootBoltSnapshotMetaData() ([]*snapshotMetaData, error) {
var rv []*snapshotMetaData
currTime := time.Now()
// including the very latest snapshot there should be n snapshots, so the
// very last one would be tc - (n-1) * d
// for eg for n = 3 the checkpoints preserved should be tc, tc - d, tc - 2d
expirationDuration := time.Duration(s.numSnapshotsToKeep-1) * s.rollbackSamplingInterval
err := s.rootBolt.View(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil
}
sc := snapshots.Cursor()
var found bool
// traversal order - latest -> oldest epoch
for sk, _ := sc.Last(); sk != nil; sk, _ = sc.Prev() {
_, snapshotEpoch, err := decodeUvarintAscending(sk)
if err != nil {
continue
}
if expirationDuration == 0 {
rv = append(rv, &snapshotMetaData{
epoch: snapshotEpoch,
})
continue
}
snapshot := snapshots.Bucket(sk)
if snapshot == nil {
continue
}
metaBucket := snapshot.Bucket(util.BoltMetaDataKey)
if metaBucket == nil {
continue
}
timeStampBytes := metaBucket.Get(util.BoltMetaDataTimeStamp)
var timeStamp time.Time
err = timeStamp.UnmarshalText(timeStampBytes)
if err != nil {
continue
}
// Don't keep snapshots older than
// expiration duration (numSnapshotsToKeep *
// rollbackSamplingInterval, by default)
if currTime.Sub(timeStamp) <= expirationDuration {
rv = append(rv, &snapshotMetaData{
epoch: snapshotEpoch,
timeStamp: timeStamp,
})
} else {
if !found {
found = true
boundary := getBoundaryCheckPoint(s.rollbackRetentionFactor,
s.checkPoints, timeStamp)
expirationDuration = currTime.Sub(boundary)
continue
}
k := encodeUvarintAscending(nil, snapshotEpoch)
err = snapshots.DeleteBucket(k)
if err == bolt.ErrBucketNotFound {
err = nil
}
}
}
return nil
})
return rv, err
}
func (s *Scorch) RootBoltSnapshotEpochs() ([]uint64, error) {
var rv []uint64
err := s.rootBolt.View(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil
}
sc := snapshots.Cursor()
for sk, _ := sc.Last(); sk != nil; sk, _ = sc.Prev() {
_, snapshotEpoch, err := decodeUvarintAscending(sk)
if err != nil {
continue
}
rv = append(rv, snapshotEpoch)
}
return nil
})
return rv, err
}
// Returns the *.zap file names that are listed in the rootBolt.
func (s *Scorch) loadZapFileNames() (map[string]struct{}, error) {
rv := map[string]struct{}{}
err := s.rootBolt.View(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil
}
sc := snapshots.Cursor()
for sk, _ := sc.First(); sk != nil; sk, _ = sc.Next() {
snapshot := snapshots.Bucket(sk)
if snapshot == nil {
continue
}
segc := snapshot.Cursor()
for segk, _ := segc.First(); segk != nil; segk, _ = segc.Next() {
if segk[0] == util.BoltInternalKey[0] {
continue
}
segmentBucket := snapshot.Bucket(segk)
if segmentBucket == nil {
continue
}
pathBytes := segmentBucket.Get(util.BoltPathKey)
if pathBytes == nil {
continue
}
pathString := string(pathBytes)
rv[string(pathString)] = struct{}{}
}
}
return nil
})
return rv, err
}
================================================
FILE: index/scorch/reader_test.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"context"
"encoding/binary"
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/document"
index "github.com/blevesearch/bleve_index_api"
)
func TestIndexReader(t *testing.T) {
cfg := CreateConfig("TestIndexReader")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
doc = document.NewDocument("2")
doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer))
doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), index.IndexField|index.IncludeTermVectors, testAnalyzer))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
// first look for a term that doesn't exist
reader, err := indexReader.TermFieldReader(context.TODO(), []byte("nope"), "name", true, true, true)
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
count := reader.Count()
if count != 0 {
t.Errorf("Expected doc count to be: %d got: %d", 0, count)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
reader, err = indexReader.TermFieldReader(context.TODO(), []byte("test"), "name", true, true, true)
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
count = reader.Count()
if count != expectedCount {
t.Errorf("Expected doc count to be: %d got: %d", expectedCount, count)
}
var match *index.TermFieldDoc
var actualCount uint64
match, err = reader.Next(nil)
for err == nil && match != nil {
match, err = reader.Next(nil)
if err != nil {
t.Errorf("unexpected error reading next")
}
actualCount++
}
if actualCount != count {
t.Errorf("count was 2, but only saw %d", actualCount)
}
internalIDBogus, err := indexReader.InternalID("a-bogus-docId")
if err != nil {
t.Fatal(err)
}
if internalIDBogus != nil {
t.Errorf("expected bogus docId to have nil InternalID")
}
internalID2, err := indexReader.InternalID("2")
if err != nil {
t.Fatal(err)
}
expectedMatch := &index.TermFieldDoc{
ID: internalID2,
Freq: 1,
Norm: 0.5773502588272095,
Vectors: []*index.TermFieldVector{
{
Field: "desc",
Pos: 3,
Start: 9,
End: 13,
},
},
}
tfr, err := indexReader.TermFieldReader(context.TODO(), []byte("rice"), "desc", true, true, true)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
match, err = tfr.Next(nil)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(expectedMatch, match) {
t.Errorf("got %#v, expected %#v", match, expectedMatch)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// now test usage of advance
reader, err = indexReader.TermFieldReader(context.TODO(), []byte("test"), "name", true, true, true)
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
match, err = reader.Advance(internalID2, nil)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match == nil {
t.Fatalf("Expected match, got nil")
}
if !match.ID.Equals(internalID2) {
t.Errorf("Expected ID '2', got '%s'", match.ID)
}
// have to manually construct bogus id, because it doesn't exist
internalID3 := make([]byte, 8)
binary.BigEndian.PutUint64(internalID3, 3)
match, err = reader.Advance(index.IndexInternalID(internalID3), nil)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match != nil {
t.Errorf("expected nil, got %v", match)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// now test creating a reader for a field that doesn't exist
reader, err = indexReader.TermFieldReader(context.TODO(), []byte("water"), "doesnotexist", true, true, true)
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
count = reader.Count()
if count != 0 {
t.Errorf("expected count 0 for reader of non-existent field")
}
match, err = reader.Next(nil)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match != nil {
t.Errorf("expected nil, got %v", match)
}
match, err = reader.Advance(index.IndexInternalID("anywhere"), nil)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match != nil {
t.Errorf("expected nil, got %v", match)
}
}
func TestIndexDocIdReader(t *testing.T) {
cfg := CreateConfig("TestIndexDocIdReader")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test test test")))
doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []uint64{}, []byte("eat more rice"), index.IndexField|index.IncludeTermVectors))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Error(err)
}
}()
// first get all doc ids
reader, err := indexReader.DocIDReaderAll()
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader.Close()
if err != nil {
t.Fatal(err)
}
}()
id, err := reader.Next()
if err != nil {
t.Fatal(err)
}
count := uint64(0)
for id != nil {
count++
id, err = reader.Next()
if err != nil {
t.Fatal(err)
}
}
if count != expectedCount {
t.Errorf("expected %d, got %d", expectedCount, count)
}
// try it again, but jump to the second doc this time
reader2, err := indexReader.DocIDReaderAll()
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader2.Close()
if err != nil {
t.Error(err)
}
}()
internalID2, err := indexReader.InternalID("2")
if err != nil {
t.Fatal(err)
}
id, err = reader2.Advance(internalID2)
if err != nil {
t.Error(err)
}
if !id.Equals(internalID2) {
t.Errorf("expected to find id '2', got '%s'", id)
}
// again 3 doesn't exist cannot use internal id for 3 as there is none
// the important aspect is that this id doesn't exist, so its ok
id, err = reader2.Advance(index.IndexInternalID("3"))
if err != nil {
t.Error(err)
}
if id != nil {
t.Errorf("expected to find id '', got '%s'", id)
}
}
func TestIndexDocIdOnlyReader(t *testing.T) {
cfg := CreateConfig("TestIndexDocIdOnlyReader")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
doc = document.NewDocument("3")
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
doc = document.NewDocument("5")
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
doc = document.NewDocument("7")
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
doc = document.NewDocument("9")
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Error(err)
}
}()
onlyIds := []string{"1", "5", "9"}
reader, err := indexReader.DocIDReaderOnly(onlyIds)
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader.Close()
if err != nil {
t.Fatal(err)
}
}()
id, err := reader.Next()
if err != nil {
t.Fatal(err)
}
count := uint64(0)
for id != nil {
count++
id, err = reader.Next()
if err != nil {
t.Fatal(err)
}
}
if count != 3 {
t.Errorf("expected 3, got %d", count)
}
// commented out because advance works with internal ids
// this test presumes we see items in external doc id order
// which is no longer the case, so simply converting external ids
// to internal ones is not logically correct
// not removing though because we need some way to test Advance()
// // try it again, but jump
// reader2, err := indexReader.DocIDReaderOnly(onlyIds)
// if err != nil {
// t.Errorf("Error accessing doc id reader: %v", err)
// }
// defer func() {
// err := reader2.Close()
// if err != nil {
// t.Error(err)
// }
// }()
//
// id, err = reader2.Advance(index.IndexInternalID("5"))
// if err != nil {
// t.Error(err)
// }
// if !id.Equals(index.IndexInternalID("5")) {
// t.Errorf("expected to find id '5', got '%s'", id)
// }
//
// id, err = reader2.Advance(index.IndexInternalID("a"))
// if err != nil {
// t.Error(err)
// }
// if id != nil {
// t.Errorf("expected to find id '', got '%s'", id)
// }
// some keys aren't actually there
onlyIds = []string{"0", "2", "4", "5", "6", "8", "a"}
reader3, err := indexReader.DocIDReaderOnly(onlyIds)
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader3.Close()
if err != nil {
t.Error(err)
}
}()
id, err = reader3.Next()
if err != nil {
t.Fatal(err)
}
count = uint64(0)
for id != nil {
count++
id, err = reader3.Next()
if err != nil {
t.Fatal(err)
}
}
if count != 1 {
t.Errorf("expected 1, got %d", count)
}
// commented out because advance works with internal ids
// this test presumes we see items in external doc id order
// which is no longer the case, so simply converting external ids
// to internal ones is not logically correct
// not removing though because we need some way to test Advance()
// // mix advance and next
// onlyIds = []string{"0", "1", "3", "5", "6", "9"}
// reader4, err := indexReader.DocIDReaderOnly(onlyIds)
// if err != nil {
// t.Errorf("Error accessing doc id reader: %v", err)
// }
// defer func() {
// err := reader4.Close()
// if err != nil {
// t.Error(err)
// }
// }()
//
// // first key is "1"
// id, err = reader4.Next()
// if err != nil {
// t.Error(err)
// }
// if !id.Equals(index.IndexInternalID("1")) {
// t.Errorf("expected to find id '1', got '%s'", id)
// }
//
// // advancing to key we dont have gives next
// id, err = reader4.Advance(index.IndexInternalID("2"))
// if err != nil {
// t.Error(err)
// }
// if !id.Equals(index.IndexInternalID("3")) {
// t.Errorf("expected to find id '3', got '%s'", id)
// }
//
// // next after advance works
// id, err = reader4.Next()
// if err != nil {
// t.Error(err)
// }
// if !id.Equals(index.IndexInternalID("5")) {
// t.Errorf("expected to find id '5', got '%s'", id)
// }
//
// // advancing to key we do have works
// id, err = reader4.Advance(index.IndexInternalID("9"))
// if err != nil {
// t.Error(err)
// }
// if !id.Equals(index.IndexInternalID("9")) {
// t.Errorf("expected to find id '9', got '%s'", id)
// }
//
// // advance backwards at end
// id, err = reader4.Advance(index.IndexInternalID("4"))
// if err != nil {
// t.Error(err)
// }
// if !id.Equals(index.IndexInternalID("5")) {
// t.Errorf("expected to find id '5', got '%s'", id)
// }
//
// // next after advance works
// id, err = reader4.Next()
// if err != nil {
// t.Error(err)
// }
// if !id.Equals(index.IndexInternalID("9")) {
// t.Errorf("expected to find id '9', got '%s'", id)
// }
//
// // advance backwards to key that exists, but not in only set
// id, err = reader4.Advance(index.IndexInternalID("7"))
// if err != nil {
// t.Error(err)
// }
// if !id.Equals(index.IndexInternalID("9")) {
// t.Errorf("expected to find id '9', got '%s'", id)
// }
}
func TestSegmentIndexAndLocalDocNumFromGlobal(t *testing.T) {
tests := []struct {
offsets []uint64
globalDocNum uint64
segmentIndex int
localDocNum uint64
}{
// just 1 segment
{
offsets: []uint64{0},
globalDocNum: 0,
segmentIndex: 0,
localDocNum: 0,
},
{
offsets: []uint64{0},
globalDocNum: 1,
segmentIndex: 0,
localDocNum: 1,
},
{
offsets: []uint64{0},
globalDocNum: 25,
segmentIndex: 0,
localDocNum: 25,
},
// now 2 segments, 30 docs in first
{
offsets: []uint64{0, 30},
globalDocNum: 0,
segmentIndex: 0,
localDocNum: 0,
},
{
offsets: []uint64{0, 30},
globalDocNum: 1,
segmentIndex: 0,
localDocNum: 1,
},
{
offsets: []uint64{0, 30},
globalDocNum: 25,
segmentIndex: 0,
localDocNum: 25,
},
{
offsets: []uint64{0, 30},
globalDocNum: 30,
segmentIndex: 1,
localDocNum: 0,
},
{
offsets: []uint64{0, 30},
globalDocNum: 35,
segmentIndex: 1,
localDocNum: 5,
},
// lots of segments
{
offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
globalDocNum: 0,
segmentIndex: 0,
localDocNum: 0,
},
{
offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
globalDocNum: 25,
segmentIndex: 0,
localDocNum: 25,
},
{
offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
globalDocNum: 35,
segmentIndex: 1,
localDocNum: 5,
},
{
offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
globalDocNum: 100,
segmentIndex: 4,
localDocNum: 1,
},
{
offsets: []uint64{0, 30, 40, 70, 99, 172, 800, 25000},
globalDocNum: 825,
segmentIndex: 6,
localDocNum: 25,
},
}
for _, test := range tests {
i := &IndexSnapshot{
offsets: test.offsets,
refs: 1,
}
gotSegmentIndex, gotLocalDocNum := i.segmentIndexAndLocalDocNumFromGlobal(test.globalDocNum)
if gotSegmentIndex != test.segmentIndex {
t.Errorf("got segment index %d expected %d for offsets %v globalDocNum %d", gotSegmentIndex, test.segmentIndex, test.offsets, test.globalDocNum)
}
if gotLocalDocNum != test.localDocNum {
t.Errorf("got localDocNum %d expected %d for offsets %v globalDocNum %d", gotLocalDocNum, test.localDocNum, test.offsets, test.globalDocNum)
}
err := i.DecRef()
if err != nil {
t.Errorf("expected no err, got: %v", err)
}
}
}
================================================
FILE: index/scorch/regexp.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"regexp/syntax"
"github.com/blevesearch/vellum/regexp"
)
func parseRegexp(pattern string) (a *regexp.Regexp, prefixBeg, prefixEnd []byte, err error) {
// TODO: potential optimization where syntax.Regexp supports a Simplify() API?
parsed, err := syntax.Parse(pattern, syntax.Perl)
if err != nil {
return nil, nil, nil, err
}
re, err := regexp.NewParsedWithLimit(pattern, parsed, regexp.DefaultLimit)
if err != nil {
return nil, nil, nil, err
}
prefix := literalPrefix(parsed)
if prefix != "" {
prefixBeg := []byte(prefix)
prefixEnd := calculateExclusiveEndFromPrefix(prefixBeg)
return re, prefixBeg, prefixEnd, nil
}
return re, nil, nil, nil
}
// Returns the literal prefix given the parse tree for a regexp
func literalPrefix(s *syntax.Regexp) string {
// traverse the left-most branch in the parse tree as long as the
// node represents a concatenation
for s != nil && s.Op == syntax.OpConcat {
if len(s.Sub) < 1 {
return ""
}
s = s.Sub[0]
}
if s.Op == syntax.OpLiteral && (s.Flags&syntax.FoldCase == 0) {
return string(s.Rune)
}
return "" // no literal prefix
}
================================================
FILE: index/scorch/regexp_test.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"regexp/syntax"
"testing"
)
func TestLiteralPrefix(t *testing.T) {
tests := []struct {
input, expected string
}{
{"", ""},
{"hello", "hello"},
{"hello.?", "hello"},
{"hello$", "hello"},
{`[h][e][l][l][o].*world`, "hello"},
{`[h-h][e-e][l-l][l-l][o-o].*world`, "hello"},
{".*", ""},
{"h.*", "h"},
{"h.?", "h"},
{"h[a-z]", "h"},
{`h\s`, "h"},
{`(hello)world`, ""},
{`日本語`, "日本語"},
{`日本語\w`, "日本語"},
{`^hello`, ""},
{`^`, ""},
{`$`, ""},
{`(?i)mArTy`, ""},
}
for i, test := range tests {
s, err := syntax.Parse(test.input, syntax.Perl)
if err != nil {
t.Fatalf("expected no syntax.Parse error, got: %v", err)
}
got := literalPrefix(s)
if test.expected != got {
t.Fatalf("test: %d, %+v, got: %s", i, test, got)
}
}
}
================================================
FILE: index/scorch/rollback.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"fmt"
"log"
"os"
"github.com/blevesearch/bleve/v2/util"
bolt "go.etcd.io/bbolt"
)
type RollbackPoint struct {
epoch uint64
meta map[string][]byte
}
func (r *RollbackPoint) GetInternal(key []byte) []byte {
return r.meta[string(key)]
}
// RollbackPoints returns an array of rollback points available for
// the application to rollback to, with more recent rollback points
// (higher epochs) coming first.
func RollbackPoints(path string) ([]*RollbackPoint, error) {
if len(path) == 0 {
return nil, fmt.Errorf("RollbackPoints: invalid path")
}
rootBoltPath := path + string(os.PathSeparator) + "root.bolt"
rootBoltOpt := &bolt.Options{
ReadOnly: true,
}
rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
if err != nil || rootBolt == nil {
return nil, err
}
// start a read-only bolt transaction
tx, err := rootBolt.Begin(false)
if err != nil {
return nil, fmt.Errorf("RollbackPoints: failed to start" +
" read-only transaction")
}
// read-only bolt transactions to be rolled back
defer func() {
_ = tx.Rollback()
_ = rootBolt.Close()
}()
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil, nil
}
rollbackPoints := []*RollbackPoint{}
c1 := snapshots.Cursor()
for k, _ := c1.Last(); k != nil; k, _ = c1.Prev() {
_, snapshotEpoch, err := decodeUvarintAscending(k)
if err != nil {
log.Printf("RollbackPoints:"+
" unable to parse segment epoch %x, continuing", k)
continue
}
snapshot := snapshots.Bucket(k)
if snapshot == nil {
log.Printf("RollbackPoints:"+
" snapshot key, but bucket missing %x, continuing", k)
continue
}
meta := map[string][]byte{}
c2 := snapshot.Cursor()
for j, _ := c2.First(); j != nil; j, _ = c2.Next() {
if j[0] == util.BoltInternalKey[0] {
internalBucket := snapshot.Bucket(j)
if internalBucket == nil {
err = fmt.Errorf("internal bucket missing")
break
}
err = internalBucket.ForEach(func(key []byte, val []byte) error {
copiedVal := append([]byte(nil), val...)
meta[string(key)] = copiedVal
return nil
})
if err != nil {
break
}
}
}
if err != nil {
log.Printf("RollbackPoints:"+
" failed in fetching internal data: %v", err)
continue
}
rollbackPoints = append(rollbackPoints, &RollbackPoint{
epoch: snapshotEpoch,
meta: meta,
})
}
return rollbackPoints, nil
}
// Rollback atomically and durably brings the store back to the point
// in time as represented by the RollbackPoint.
// Rollback() should only be passed a RollbackPoint that came from the
// same store using the RollbackPoints() API along with the index path.
func Rollback(path string, to *RollbackPoint) error {
if to == nil {
return fmt.Errorf("Rollback: RollbackPoint is nil")
}
if len(path) == 0 {
return fmt.Errorf("Rollback: index path is empty")
}
rootBoltPath := path + string(os.PathSeparator) + "root.bolt"
rootBoltOpt := &bolt.Options{
ReadOnly: false,
}
rootBolt, err := bolt.Open(rootBoltPath, 0600, rootBoltOpt)
if err != nil || rootBolt == nil {
return err
}
defer func() {
err1 := rootBolt.Close()
if err1 != nil && err == nil {
err = err1
}
}()
// pick all the younger persisted epochs in bolt store
// including the target one.
var found bool
var eligibleEpochs []uint64
err = rootBolt.View(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil
}
sc := snapshots.Cursor()
for sk, _ := sc.Last(); sk != nil && !found; sk, _ = sc.Prev() {
_, snapshotEpoch, err := decodeUvarintAscending(sk)
if err != nil {
continue
}
if snapshotEpoch == to.epoch {
found = true
}
eligibleEpochs = append(eligibleEpochs, snapshotEpoch)
}
return nil
})
if len(eligibleEpochs) == 0 {
return fmt.Errorf("Rollback: no persisted epochs found in bolt")
}
if !found {
return fmt.Errorf("Rollback: target epoch %d not found in bolt", to.epoch)
}
// start a write transaction
tx, err := rootBolt.Begin(true)
if err != nil {
return err
}
defer func() {
if err == nil {
err = tx.Commit()
} else {
_ = tx.Rollback()
}
if err == nil {
err = rootBolt.Sync()
}
}()
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil
}
for _, epoch := range eligibleEpochs {
k := encodeUvarintAscending(nil, epoch)
if err != nil {
continue
}
if epoch == to.epoch {
// return here as it already processed until the given epoch
return nil
}
err = snapshots.DeleteBucket(k)
if err == bolt.ErrBucketNotFound {
err = nil
}
}
return err
}
================================================
FILE: index/scorch/rollback_test.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"fmt"
"io"
"os"
"path/filepath"
"testing"
"time"
"github.com/blevesearch/bleve/v2/document"
index "github.com/blevesearch/bleve_index_api"
)
func TestIndexRollback(t *testing.T) {
cfg := CreateConfig("TestIndexRollback")
numSnapshotsToKeepOrig := NumSnapshotsToKeep
NumSnapshotsToKeep = 1000
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
NumSnapshotsToKeep = numSnapshotsToKeepOrig
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
_, ok := idx.(*Scorch)
if !ok {
t.Fatalf("Not a scorch index?")
}
indexPath, _ := cfg["path"].(string)
// should have no rollback points initially
rollbackPoints, err := RollbackPoints(indexPath)
if err == nil {
t.Fatalf("expected no err, got: %v, %d", err, len(rollbackPoints))
}
if len(rollbackPoints) != 0 {
t.Fatalf("expected no rollbackPoints, got %d", len(rollbackPoints))
}
err = idx.Open()
if err != nil {
t.Fatal(err)
}
// create a batch, insert 2 new documents
batch := index.NewBatch()
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test1")))
batch.Update(doc)
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2")))
batch.Update(doc)
err = idx.Batch(batch)
if err != nil {
t.Fatal(err)
}
readerSlow, err := idx.Reader() // keep snapshot around so it's not cleaned up
if err != nil {
t.Fatal(err)
}
defer func() {
_ = readerSlow.Close()
}()
err = idx.Close()
if err != nil {
t.Fatal(err)
}
// fetch rollback points after first batch
rollbackPoints, err = RollbackPoints(indexPath)
if err != nil {
t.Fatalf("expected no err, got: %v, %d", err, len(rollbackPoints))
}
if len(rollbackPoints) == 0 {
t.Fatalf("expected some rollbackPoints, got none")
}
// set this as a rollback point for the future
rollbackPoint := rollbackPoints[0]
err = idx.Open()
if err != nil {
t.Fatal(err)
}
// create another batch, insert 2 new documents, and delete an existing one
batch = index.NewBatch()
doc = document.NewDocument("3")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
batch.Update(doc)
doc = document.NewDocument("4")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test4")))
batch.Update(doc)
batch.Delete("1")
err = idx.Batch(batch)
if err != nil {
t.Fatal(err)
}
err = idx.Close()
if err != nil {
t.Fatal(err)
}
rollbackPointsB, err := RollbackPoints(indexPath)
if err != nil || len(rollbackPointsB) <= len(rollbackPoints) {
t.Fatalf("expected no err, got: %v, %d", err, len(rollbackPointsB))
}
found := false
for _, p := range rollbackPointsB {
if rollbackPoint.epoch == p.epoch {
found = true
}
}
if !found {
t.Fatalf("expected rollbackPoint epoch to still be available")
}
err = idx.Open()
if err != nil {
t.Fatal(err)
}
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Fatal(err)
}
// expect docs 2, 3, 4
if docCount != 3 {
t.Fatalf("unexpected doc count: %v", docCount)
}
ret, err := reader.Document("1")
if err != nil || ret != nil {
t.Fatal(ret, err)
}
ret, err = reader.Document("2")
if err != nil || ret == nil {
t.Fatal(ret, err)
}
ret, err = reader.Document("3")
if err != nil || ret == nil {
t.Fatal(ret, err)
}
ret, err = reader.Document("4")
if err != nil || ret == nil {
t.Fatal(ret, err)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
err = idx.Close()
if err != nil {
t.Fatal(err)
}
// rollback to a non existing rollback point
err = Rollback(indexPath, &RollbackPoint{epoch: 100})
if err == nil {
t.Fatalf("expected err: Rollback: target epoch 100 not found in bolt")
}
// rollback to the selected rollback point
err = Rollback(indexPath, rollbackPoint)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatal(err)
}
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Fatal(err)
}
// expect only docs 1, 2
if docCount != 2 {
t.Fatalf("unexpected doc count: %v", docCount)
}
ret, err = reader.Document("1")
if err != nil || ret == nil {
t.Fatal(ret, err)
}
ret, err = reader.Document("2")
if err != nil || ret == nil {
t.Fatal(ret, err)
}
ret, err = reader.Document("3")
if err != nil || ret != nil {
t.Fatal(ret, err)
}
ret, err = reader.Document("4")
if err != nil || ret != nil {
t.Fatal(ret, err)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}
func TestGetProtectedSnapshots(t *testing.T) {
origRollbackSamplingInterval := RollbackSamplingInterval
defer func() {
RollbackSamplingInterval = origRollbackSamplingInterval
}()
RollbackSamplingInterval = 10 * time.Minute
currentTimeStamp := time.Now()
tests := []struct {
title string
metaData []*snapshotMetaData
numSnapshotsToKeep int
expCount int
expEpochs []uint64
}{
{
title: "epochs that have exact timestamps as per expectation for protecting",
metaData: []*snapshotMetaData{
{epoch: 100, timeStamp: currentTimeStamp},
{epoch: 99, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 12))},
{epoch: 88, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 6))},
{epoch: 50, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval))},
{epoch: 35, timeStamp: currentTimeStamp.Add(-(6 * RollbackSamplingInterval / 5))},
{epoch: 10, timeStamp: currentTimeStamp.Add(-(2 * RollbackSamplingInterval))},
},
numSnapshotsToKeep: 3,
expCount: 3,
expEpochs: []uint64{100, 50, 10},
},
{
title: "epochs that have exact timestamps as per expectation for protecting",
metaData: []*snapshotMetaData{
{epoch: 100, timeStamp: currentTimeStamp},
{epoch: 99, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 12))},
{epoch: 88, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 6))},
{epoch: 50, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval))},
},
numSnapshotsToKeep: 2,
expCount: 2,
expEpochs: []uint64{100, 50},
},
{
title: "epochs that have timestamps approximated to the expected value, " +
"always retain the latest one",
metaData: []*snapshotMetaData{
{epoch: 100, timeStamp: currentTimeStamp},
{epoch: 99, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 12))},
{epoch: 88, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 6))},
{epoch: 50, timeStamp: currentTimeStamp.Add(-(3 * RollbackSamplingInterval / 4))},
{epoch: 35, timeStamp: currentTimeStamp.Add(-(6 * RollbackSamplingInterval / 5))},
{epoch: 10, timeStamp: currentTimeStamp.Add(-(2 * RollbackSamplingInterval))},
},
numSnapshotsToKeep: 3,
expCount: 3,
expEpochs: []uint64{100, 35, 10},
},
{
title: "protecting epochs when we don't have enough snapshots with RollbackSamplingInterval" +
" separated timestamps",
metaData: []*snapshotMetaData{
{epoch: 100, timeStamp: currentTimeStamp},
{epoch: 99, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 12))},
{epoch: 88, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 6))},
{epoch: 50, timeStamp: currentTimeStamp.Add(-(3 * RollbackSamplingInterval / 4))},
{epoch: 35, timeStamp: currentTimeStamp.Add(-(5 * RollbackSamplingInterval / 6))},
{epoch: 10, timeStamp: currentTimeStamp.Add(-(7 * RollbackSamplingInterval / 8))},
},
numSnapshotsToKeep: 4,
expCount: 4,
expEpochs: []uint64{100, 99, 88, 10},
},
{
title: "epochs of which some are approximated to the expected timestamps, and" +
" we don't have enough snapshots with RollbackSamplingInterval separated timestamps",
metaData: []*snapshotMetaData{
{epoch: 100, timeStamp: currentTimeStamp},
{epoch: 99, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 12))},
{epoch: 88, timeStamp: currentTimeStamp.Add(-(RollbackSamplingInterval / 6))},
{epoch: 50, timeStamp: currentTimeStamp.Add(-(3 * RollbackSamplingInterval / 4))},
{epoch: 35, timeStamp: currentTimeStamp.Add(-(8 * RollbackSamplingInterval / 7))},
{epoch: 10, timeStamp: currentTimeStamp.Add(-(6 * RollbackSamplingInterval / 5))},
},
numSnapshotsToKeep: 3,
expCount: 3,
expEpochs: []uint64{100, 50, 10},
},
}
for i, test := range tests {
protectedEpochs := getProtectedSnapshots(RollbackSamplingInterval,
test.numSnapshotsToKeep, test.metaData)
if len(protectedEpochs) != test.expCount {
t.Errorf("%d test: %s, getProtectedSnapshots expected to return %d "+
"snapshots, but got: %d", i, test.title, test.expCount, len(protectedEpochs))
}
for _, e := range test.expEpochs {
if _, found := protectedEpochs[e]; !found {
t.Errorf("%d test: %s, %d epoch expected to be protected, "+
"but missing from protected list: %v", i, test.title, e, protectedEpochs)
}
}
}
}
func indexDummyData(t *testing.T, scorchi *Scorch, i int) {
// create a batch, insert 2 new documents
batch := index.NewBatch()
doc := document.NewDocument(fmt.Sprintf("%d", i))
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test1")))
batch.Update(doc)
doc = document.NewDocument(fmt.Sprintf("%d", i+1))
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2")))
batch.Update(doc)
err := scorchi.Batch(batch)
if err != nil {
t.Fatal(err)
}
}
type testFSDirector string
func (f testFSDirector) GetWriter(filePath string) (io.WriteCloser,
error) {
dir, file := filepath.Split(filePath)
if dir != "" {
err := os.MkdirAll(filepath.Join(string(f), dir), os.ModePerm)
if err != nil {
return nil, err
}
}
return os.OpenFile(filepath.Join(string(f), dir, file),
os.O_RDWR|os.O_CREATE, 0600)
}
func TestLatestSnapshotProtected(t *testing.T) {
cfg := CreateConfig("TestLatestSnapshotProtected")
numSnapshotsToKeepOrig := NumSnapshotsToKeep
NumSnapshotsToKeep = 3
rollbackSamplingIntervalOrig := RollbackSamplingInterval
RollbackSamplingInterval = 10 * time.Second
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
NumSnapshotsToKeep = numSnapshotsToKeepOrig
RollbackSamplingInterval = rollbackSamplingIntervalOrig
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
// disable merger and purger
RegistryEventCallbacks["test"] = func(e Event) bool {
if e.Kind == EventKindPreMergeCheck || e.Kind == EventKindPurgerCheck {
return false
}
return true
}
cfg["eventCallbackName"] = "test"
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
scorchi, ok := idx.(*Scorch)
if !ok {
t.Fatalf("Not a scorch index?")
}
err = scorchi.Open()
if err != nil {
t.Fatal(err)
}
// replicate the following scenario of persistence of snapshots
// tc, tc - d/12, tc - d/6, tc - 3d/4, tc - 5d/6, tc - 6d/5
// approximate timestamps where there's a chance that the latest snapshot
// might not fit into the time-series
indexDummyData(t, scorchi, 1)
persistedSnapshots, err := scorchi.rootBoltSnapshotMetaData()
if err != nil {
t.Fatal(err)
}
if len(persistedSnapshots) != 1 {
t.Fatalf("expected 1 persisted snapshot, got %d", len(persistedSnapshots))
}
time.Sleep(4 * RollbackSamplingInterval / 5)
indexDummyData(t, scorchi, 3)
time.Sleep(9 * RollbackSamplingInterval / 20)
indexDummyData(t, scorchi, 5)
time.Sleep(7 * RollbackSamplingInterval / 12)
indexDummyData(t, scorchi, 7)
time.Sleep(1 * RollbackSamplingInterval / 12)
indexDummyData(t, scorchi, 9)
persistedSnapshots, err = scorchi.rootBoltSnapshotMetaData()
if err != nil {
t.Fatal(err)
}
protectedSnapshots := getProtectedSnapshots(RollbackSamplingInterval, NumSnapshotsToKeep, persistedSnapshots)
if len(protectedSnapshots) != 3 {
t.Fatalf("expected %d protected snapshots, got %d", NumSnapshotsToKeep, len(protectedSnapshots))
}
if _, ok := protectedSnapshots[persistedSnapshots[0].epoch]; !ok {
t.Fatalf("expected %d to be protected, but not found", persistedSnapshots[0].epoch)
}
}
func TestBackupRacingWithPurge(t *testing.T) {
cfg := CreateConfig("TestBackupRacingWithPurge")
numSnapshotsToKeepOrig := NumSnapshotsToKeep
NumSnapshotsToKeep = 3
rollbackSamplingIntervalOrig := RollbackSamplingInterval
RollbackSamplingInterval = 10 * time.Second
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
NumSnapshotsToKeep = numSnapshotsToKeepOrig
RollbackSamplingInterval = rollbackSamplingIntervalOrig
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
// disable merger and purger
RegistryEventCallbacks["test"] = func(e Event) bool {
if e.Kind == EventKindPreMergeCheck || e.Kind == EventKindPurgerCheck {
return false
}
return true
}
cfg["eventCallbackName"] = "test"
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
defer idx.Close()
scorchi, ok := idx.(*Scorch)
if !ok {
t.Fatalf("Not a scorch index?")
}
err = scorchi.Open()
if err != nil {
t.Fatal(err)
}
// replicate the following scenario of persistence of snapshots
// tc, tc - d/12, tc - d/6, tc - 3d/4, tc - 5d/6, tc - 6d/5
// approximate timestamps where there's a chance that the latest snapshot
// might not fit into the time-series
indexDummyData(t, scorchi, 1)
time.Sleep(4 * RollbackSamplingInterval / 5)
indexDummyData(t, scorchi, 3)
time.Sleep(9 * RollbackSamplingInterval / 20)
indexDummyData(t, scorchi, 5)
time.Sleep(7 * RollbackSamplingInterval / 12)
indexDummyData(t, scorchi, 7)
time.Sleep(1 * RollbackSamplingInterval / 12)
indexDummyData(t, scorchi, 9)
// now if the purge code is invoked, there's a possibility of the latest snapshot
// being removed from bolt and the corresponding file segment getting cleaned up.
scorchi.removeOldData()
copyReader := scorchi.CopyReader()
defer func() { copyReader.CloseCopyReader() }()
backupidxConfig := CreateConfig("backup-directory")
err = InitTest(backupidxConfig)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(backupidxConfig)
if err != nil {
t.Log(err)
}
}()
// if the latest snapshot was purged, the following will return error
err = copyReader.CopyTo(testFSDirector(backupidxConfig["path"].(string)))
if err != nil {
t.Fatalf("error copying the index: %v", err)
}
}
func TestSparseMutationCheckpointing(t *testing.T) {
cfg := CreateConfig("TestSparseMutationCheckpointing")
numSnapshotsToKeepOrig := NumSnapshotsToKeep
NumSnapshotsToKeep = 3
rollbackSamplingIntervalOrig := RollbackSamplingInterval
RollbackSamplingInterval = 2 * time.Second
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
NumSnapshotsToKeep = numSnapshotsToKeepOrig
RollbackSamplingInterval = rollbackSamplingIntervalOrig
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
// disable merger and purger
RegistryEventCallbacks["test"] = func(e Event) bool {
if e.Kind == EventKindPreMergeCheck {
return false
}
return true
}
cfg["eventCallbackName"] = "test"
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
scorchi, ok := idx.(*Scorch)
if !ok {
t.Fatalf("Not a scorch index?")
}
err = scorchi.Open()
if err != nil {
t.Fatal(err)
}
// create 4 snapshots every 2 seconds
indexDummyData(t, scorchi, 1)
time.Sleep(RollbackSamplingInterval)
indexDummyData(t, scorchi, 3)
time.Sleep(RollbackSamplingInterval)
indexDummyData(t, scorchi, 5)
time.Sleep(RollbackSamplingInterval)
indexDummyData(t, scorchi, 7)
// now the another snapshot is persisted outside of the window of checkpointing
// and we should be able to retain some older checkpoints as well along with
// the latest one
time.Sleep(time.Duration(NumSnapshotsToKeep) * RollbackSamplingInterval)
indexDummyData(t, scorchi, 9)
persistedSnapshots, err := scorchi.rootBoltSnapshotMetaData()
if err != nil {
t.Fatal(err)
}
// should have more than 1 snapshots
protectedSnapshots := getProtectedSnapshots(RollbackSamplingInterval, NumSnapshotsToKeep, persistedSnapshots)
if len(protectedSnapshots) <= 1 {
t.Fatalf("expected %d protected snapshots, got %d", NumSnapshotsToKeep, len(protectedSnapshots))
}
}
================================================
FILE: index/scorch/scorch.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"encoding/json"
"fmt"
"os"
"path/filepath"
"sync"
"sync/atomic"
"time"
"github.com/RoaringBitmap/roaring/v2"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"
bolt "go.etcd.io/bbolt"
)
const Name = "scorch"
const Version uint8 = 2
var ErrClosed = fmt.Errorf("scorch closed")
type Scorch struct {
nextSegmentID uint64
stats Stats
iStats internalStats
readOnly bool
version uint8
config map[string]interface{}
segmentConfig map[string]interface{}
analysisQueue *index.AnalysisQueue
path string
unsafeBatch bool
rootLock sync.RWMutex
root *IndexSnapshot // holds 1 ref-count on the root
rootPersisted []chan error // closed when root is persisted
persistedCallbacks []index.BatchCallback
nextSnapshotEpoch uint64
eligibleForRemoval []uint64 // Index snapshot epochs that are safe to GC.
ineligibleForRemoval map[string]bool // Filenames that should not be GC'ed yet.
// keeps track of segments scheduled for online copy/backup operation. Each segment's filename maps to
// the count of copy schedules. Segments with non-zero counts are protected from removal by the cleanup
// operation. Counts decrement upon successful copy, allowing removal of segments with zero or absent counts.
// must be accessed within the rootLock as it is accessed by the asynchronous cleanup routine.
copyScheduled map[string]int
numSnapshotsToKeep int
rollbackRetentionFactor float64
checkPoints []*snapshotMetaData
rollbackSamplingInterval time.Duration
closeCh chan struct{}
introductions chan *segmentIntroduction
persists chan *persistIntroduction
merges chan *segmentMerge
introducerNotifier chan *epochWatcher
persisterNotifier chan *epochWatcher
rootBolt *bolt.DB
asyncTasks sync.WaitGroup
onEvent func(event Event) bool
onAsyncError func(err error, path string)
forceMergeRequestCh chan *mergerCtrl
segPlugin SegmentPlugin
spatialPlugin index.SpatialAnalyzerPlugin
}
type ScorchErrorType string
func (t ScorchErrorType) Error() string {
return string(t)
}
// ErrType values for ScorchError
const (
ErrAsyncPanic = ScorchErrorType("async panic error")
ErrPersist = ScorchErrorType("persist error")
ErrCleanup = ScorchErrorType("cleanup error")
ErrOptionsParse = ScorchErrorType("options parse error")
)
// ScorchError is passed to onAsyncError when errors are
// fired from scorch background processes
type ScorchError struct {
Source string
ErrMsg string
ErrType ScorchErrorType
}
func (e *ScorchError) Error() string {
return fmt.Sprintf("source: %s, %v: %s", e.Source, e.ErrType, e.ErrMsg)
}
// Lets the onAsyncError function verify what type of
// error is fired using errors.Is(...). This lets the function
// handle errors differently.
func (e *ScorchError) Unwrap() error {
return e.ErrType
}
func NewScorchError(source, errMsg string, errType ScorchErrorType) error {
return &ScorchError{
Source: source,
ErrMsg: errMsg,
ErrType: errType,
}
}
type internalStats struct {
persistEpoch uint64
persistSnapshotSize uint64
mergeEpoch uint64
mergeSnapshotSize uint64
newSegBufBytesAdded uint64
newSegBufBytesRemoved uint64
analysisBytesAdded uint64
analysisBytesRemoved uint64
}
func NewScorch(storeName string,
config map[string]interface{},
analysisQueue *index.AnalysisQueue,
) (index.Index, error) {
rv := &Scorch{
version: Version,
config: config,
analysisQueue: analysisQueue,
nextSnapshotEpoch: 1,
closeCh: make(chan struct{}),
ineligibleForRemoval: map[string]bool{},
forceMergeRequestCh: make(chan *mergerCtrl, 1),
segPlugin: defaultSegmentPlugin,
copyScheduled: map[string]int{},
segmentConfig: make(map[string]interface{}),
}
forcedSegmentType, forcedSegmentVersion, err := configForceSegmentTypeVersion(config)
if err != nil {
return nil, err
}
if forcedSegmentType != "" && forcedSegmentVersion != 0 {
err := rv.loadSegmentPlugin(forcedSegmentType,
uint32(forcedSegmentVersion))
if err != nil {
return nil, err
}
}
typ, ok := config["spatialPlugin"].(string)
if ok {
if err := rv.loadSpatialAnalyzerPlugin(typ); err != nil {
return nil, err
}
}
rv.root = &IndexSnapshot{parent: rv, refs: 1, creator: "NewScorch"}
ro, ok := config["read_only"].(bool)
if ok {
rv.readOnly = ro
}
ub, ok := config["unsafe_batch"].(bool)
if ok {
rv.unsafeBatch = ub
}
ecbName, ok := config["eventCallbackName"].(string)
if ok {
rv.onEvent = RegistryEventCallbacks[ecbName]
}
aecbName, ok := config["asyncErrorCallbackName"].(string)
if ok {
rv.onAsyncError = RegistryAsyncErrorCallbacks[aecbName]
}
// validate any custom persistor options to
// prevent an async error in the persistor routine
_, err = rv.parsePersisterOptions()
if err != nil {
return nil, err
}
// validate any custom merge planner options to
// prevent an async error in the merger routine
_, err = rv.parseMergePlannerOptions()
if err != nil {
return nil, err
}
return rv, nil
}
// configForceSegmentTypeVersion checks if the caller has requested a
// specific segment type/version
func configForceSegmentTypeVersion(config map[string]interface{}) (string, uint32, error) {
forcedSegmentVersion, err := parseToInteger(config["forceSegmentVersion"])
if err != nil {
return "", 0, nil
}
forcedSegmentType, ok := config["forceSegmentType"].(string)
if !ok {
return "", 0, fmt.Errorf(
"forceSegmentVersion set to %d, must also specify forceSegmentType", forcedSegmentVersion)
}
return forcedSegmentType, uint32(forcedSegmentVersion), nil
}
func (s *Scorch) NumEventsBlocking() uint64 {
eventsCompleted := atomic.LoadUint64(&s.stats.TotEventTriggerCompleted)
eventsStarted := atomic.LoadUint64(&s.stats.TotEventTriggerStarted)
return eventsStarted - eventsCompleted
}
func (s *Scorch) fireEvent(kind EventKind, dur time.Duration) bool {
res := true
if s.onEvent != nil {
atomic.AddUint64(&s.stats.TotEventTriggerStarted, 1)
res = s.onEvent(Event{Kind: kind, Scorch: s, Duration: dur})
atomic.AddUint64(&s.stats.TotEventTriggerCompleted, 1)
}
return res
}
func (s *Scorch) fireAsyncError(err error) {
if s.onAsyncError != nil {
s.onAsyncError(err, s.path)
}
atomic.AddUint64(&s.stats.TotOnErrors, 1)
}
func (s *Scorch) Open() error {
if s.rootBolt == nil {
err := s.openBolt()
if err != nil {
return err
}
}
s.asyncTasks.Add(1)
go s.introducerLoop()
if !s.readOnly && s.path != "" {
s.asyncTasks.Add(1)
go s.persisterLoop()
s.asyncTasks.Add(1)
go s.mergerLoop()
}
return nil
}
func (s *Scorch) openBolt() error {
var ok bool
s.path, ok = s.config["path"].(string)
if !ok {
return fmt.Errorf("must specify path")
}
if s.path == "" {
s.unsafeBatch = true
}
rootBoltOpt := *bolt.DefaultOptions
if s.readOnly {
rootBoltOpt.ReadOnly = true
rootBoltOpt.OpenFile = func(path string, flag int, mode os.FileMode) (*os.File, error) {
// Bolt appends an O_CREATE flag regardless.
// See - https://github.com/etcd-io/bbolt/blob/v1.3.5/db.go#L210
// Use os.O_RDONLY only if path exists (#1623)
if _, err := os.Stat(path); os.IsNotExist(err) {
return os.OpenFile(path, flag, mode)
}
return os.OpenFile(path, os.O_RDONLY, mode)
}
} else {
if s.path != "" {
err := os.MkdirAll(s.path, 0o700)
if err != nil {
return err
}
}
}
if boltTimeoutStr, ok := s.config["bolt_timeout"].(string); ok {
var err error
boltTimeout, err := time.ParseDuration(boltTimeoutStr)
if err != nil {
return fmt.Errorf("invalid duration specified for bolt_timeout: %v", err)
}
rootBoltOpt.Timeout = boltTimeout
}
rootBoltPath := s.path + string(os.PathSeparator) + "root.bolt"
var err error
if s.path != "" {
s.rootBolt, err = bolt.Open(rootBoltPath, 0o600, &rootBoltOpt)
if err != nil {
return err
}
// now see if there is any existing state to load
err = s.loadFromBolt()
if err != nil {
_ = s.Close()
return err
}
}
atomic.StoreUint64(&s.stats.TotFileSegmentsAtRoot, uint64(len(s.root.segment)))
s.introductions = make(chan *segmentIntroduction)
s.persists = make(chan *persistIntroduction)
s.merges = make(chan *segmentMerge)
s.introducerNotifier = make(chan *epochWatcher, 1)
s.persisterNotifier = make(chan *epochWatcher, 1)
s.closeCh = make(chan struct{})
s.forceMergeRequestCh = make(chan *mergerCtrl, 1)
if !s.readOnly && s.path != "" {
err := s.removeOldZapFiles() // Before persister or merger create any new files.
if err != nil {
_ = s.Close()
return err
}
}
s.numSnapshotsToKeep = NumSnapshotsToKeep
if v, ok := s.config["numSnapshotsToKeep"]; ok {
var t int
if t, err = parseToInteger(v); err != nil {
return fmt.Errorf("numSnapshotsToKeep parse err: %v", err)
}
if t > 0 {
s.numSnapshotsToKeep = t
}
}
s.rollbackSamplingInterval = RollbackSamplingInterval
if v, ok := s.config["rollbackSamplingInterval"]; ok {
var t time.Duration
if t, err = parseToTimeDuration(v); err != nil {
return fmt.Errorf("rollbackSamplingInterval parse err: %v", err)
}
s.rollbackSamplingInterval = t
}
s.rollbackRetentionFactor = RollbackRetentionFactor
if v, ok := s.config["rollbackRetentionFactor"]; ok {
var r float64
if r, ok = v.(float64); ok {
return fmt.Errorf("rollbackRetentionFactor parse err: %v", err)
}
s.rollbackRetentionFactor = r
}
typ, ok := s.config["spatialPlugin"].(string)
if ok {
if err := s.loadSpatialAnalyzerPlugin(typ); err != nil {
return err
}
}
return nil
}
func (s *Scorch) Close() (err error) {
startTime := time.Now()
defer func() {
s.fireEvent(EventKindClose, time.Since(startTime))
}()
s.fireEvent(EventKindCloseStart, 0)
// signal to async tasks we want to close
close(s.closeCh)
// wait for them to close
s.asyncTasks.Wait()
// now close the root bolt
if s.rootBolt != nil {
err = s.rootBolt.Close()
s.rootLock.Lock()
if s.root != nil {
err2 := s.root.DecRef()
if err == nil {
err = err2
}
}
s.root = nil
s.rootBolt = nil
s.rootLock.Unlock()
}
return
}
func (s *Scorch) Update(doc index.Document) error {
b := index.NewBatch()
b.Update(doc)
return s.Batch(b)
}
func (s *Scorch) Delete(id string) error {
b := index.NewBatch()
b.Delete(id)
return s.Batch(b)
}
// Batch applices a batch of changes to the index atomically
func (s *Scorch) Batch(batch *index.Batch) (err error) {
start := time.Now()
// notify handlers that we're about to index a batch of data
s.fireEvent(EventKindBatchIntroductionStart, 0)
defer func() {
s.fireEvent(EventKindBatchIntroduction, time.Since(start))
}()
resultChan := make(chan index.Document, len(batch.IndexOps))
var numUpdates uint64
var numDeletes uint64
var numPlainTextBytes uint64
var ids []string
for docID, doc := range batch.IndexOps {
if doc != nil {
// insert _id field
doc.AddIDField()
numUpdates++
numPlainTextBytes += doc.NumPlainTextBytes()
} else {
numDeletes++
}
ids = append(ids, docID)
}
// FIXME could sort ids list concurrent with analysis?
if numUpdates > 0 {
go func() {
for k := range batch.IndexOps {
doc := batch.IndexOps[k]
if doc != nil {
// put the work on the queue
s.analysisQueue.Queue(func() {
analyze(doc, s.setSpatialAnalyzerPlugin)
resultChan <- doc
})
}
}
}()
}
// wait for analysis result
analysisResults := make([]index.Document, int(numUpdates))
var itemsDeQueued uint64
var totalAnalysisSize int
for itemsDeQueued < numUpdates {
result := <-resultChan
resultSize := result.Size()
// check if the document is searchable by the index
if result.Indexed() {
atomic.AddUint64(&s.stats.TotMutationsFiltered, 1)
}
atomic.AddUint64(&s.iStats.analysisBytesAdded, uint64(resultSize))
totalAnalysisSize += resultSize
analysisResults[itemsDeQueued] = result
itemsDeQueued++
}
close(resultChan)
defer atomic.AddUint64(&s.iStats.analysisBytesRemoved, uint64(totalAnalysisSize))
atomic.AddUint64(&s.stats.TotAnalysisTime, uint64(time.Since(start)))
indexStart := time.Now()
var newSegment segment.Segment
var bufBytes uint64
stats := newFieldStats()
if len(analysisResults) > 0 {
newSegment, bufBytes, err = s.segPlugin.NewUsing(analysisResults, s.segmentConfig)
if err != nil {
return err
}
if segB, ok := newSegment.(segment.DiskStatsReporter); ok {
atomic.AddUint64(&s.stats.TotBytesWrittenAtIndexTime,
segB.BytesWritten())
}
atomic.AddUint64(&s.iStats.newSegBufBytesAdded, bufBytes)
if fsr, ok := newSegment.(segment.FieldStatsReporter); ok {
fsr.UpdateFieldStats(stats)
}
} else {
atomic.AddUint64(&s.stats.TotBatchesEmpty, 1)
}
err = s.prepareSegment(newSegment, ids, batch.InternalOps, batch.PersistedCallback(), stats)
if err != nil {
if newSegment != nil {
_ = newSegment.Close()
}
atomic.AddUint64(&s.stats.TotOnErrors, 1)
} else {
atomic.AddUint64(&s.stats.TotUpdates, numUpdates)
atomic.AddUint64(&s.stats.TotDeletes, numDeletes)
atomic.AddUint64(&s.stats.TotBatches, 1)
atomic.AddUint64(&s.stats.TotIndexedPlainTextBytes, numPlainTextBytes)
}
atomic.AddUint64(&s.iStats.newSegBufBytesRemoved, bufBytes)
atomic.AddUint64(&s.stats.TotIndexTime, uint64(time.Since(indexStart)))
return err
}
func (s *Scorch) prepareSegment(newSegment segment.Segment, ids []string,
internalOps map[string][]byte, persistedCallback index.BatchCallback, stats *fieldStats,
) error {
// new introduction
introduction := &segmentIntroduction{
id: atomic.AddUint64(&s.nextSegmentID, 1),
data: newSegment,
ids: ids,
internal: internalOps,
stats: stats,
applied: make(chan error),
persistedCallback: persistedCallback,
}
if !s.unsafeBatch {
introduction.persisted = make(chan error, 1)
}
// optimistically prepare obsoletes outside of rootLock
s.rootLock.RLock()
root := s.root
root.AddRef()
s.rootLock.RUnlock()
defer func() { _ = root.DecRef() }()
introduction.obsoletes = make(map[uint64]*roaring.Bitmap, len(root.segment))
for _, seg := range root.segment {
delta, err := seg.segment.DocNumbers(ids)
if err != nil {
return err
}
introduction.obsoletes[seg.id] = delta
}
introStartTime := time.Now()
s.introductions <- introduction
// block until this segment is applied
err := <-introduction.applied
if err != nil {
return err
}
if introduction.persisted != nil {
err = <-introduction.persisted
}
introTime := uint64(time.Since(introStartTime))
atomic.AddUint64(&s.stats.TotBatchIntroTime, introTime)
if atomic.LoadUint64(&s.stats.MaxBatchIntroTime) < introTime {
atomic.StoreUint64(&s.stats.MaxBatchIntroTime, introTime)
}
return err
}
func (s *Scorch) SetInternal(key, val []byte) error {
b := index.NewBatch()
b.SetInternal(key, val)
return s.Batch(b)
}
func (s *Scorch) DeleteInternal(key []byte) error {
b := index.NewBatch()
b.DeleteInternal(key)
return s.Batch(b)
}
// Reader returns a low-level accessor on the index data. Close it to
// release associated resources.
func (s *Scorch) Reader() (index.IndexReader, error) {
return s.currentSnapshot(), nil
}
func (s *Scorch) currentSnapshot() *IndexSnapshot {
s.rootLock.RLock()
rv := s.root
if rv != nil {
rv.AddRef()
}
s.rootLock.RUnlock()
return rv
}
func (s *Scorch) Stats() json.Marshaler {
return &s.stats
}
func (s *Scorch) BytesReadQueryTime() uint64 {
return s.stats.TotBytesReadAtQueryTime
}
func (s *Scorch) diskFileStats(rootSegmentPaths map[string]struct{}) (uint64,
uint64, uint64,
) {
var numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot uint64
if s.path != "" {
files, err := os.ReadDir(s.path)
if err == nil {
for _, f := range files {
if !f.IsDir() {
if finfo, err := f.Info(); err == nil {
numBytesUsedDisk += uint64(finfo.Size())
numFilesOnDisk++
if rootSegmentPaths != nil {
fname := s.path + string(os.PathSeparator) + finfo.Name()
if _, fileAtRoot := rootSegmentPaths[fname]; fileAtRoot {
numBytesOnDiskByRoot += uint64(finfo.Size())
}
}
}
}
}
}
}
// if no root files path given, then consider all disk files.
if rootSegmentPaths == nil {
return numFilesOnDisk, numBytesUsedDisk, numBytesUsedDisk
}
return numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot
}
func (s *Scorch) StatsMap() map[string]interface{} {
m := s.stats.ToMap()
indexSnapshot := s.currentSnapshot()
if indexSnapshot == nil {
return nil
}
defer func() {
_ = indexSnapshot.Close()
}()
rootSegPaths := indexSnapshot.diskSegmentsPaths()
s.rootLock.RLock()
m["CurFilesIneligibleForRemoval"] = uint64(len(s.ineligibleForRemoval))
s.rootLock.RUnlock()
numFilesOnDisk, numBytesUsedDisk, numBytesOnDiskByRoot := s.diskFileStats(rootSegPaths)
m["CurOnDiskBytes"] = numBytesUsedDisk
m["CurOnDiskFiles"] = numFilesOnDisk
// TODO: consider one day removing these backwards compatible
// names for apps using the old names
m["updates"] = m["TotUpdates"]
m["deletes"] = m["TotDeletes"]
m["batches"] = m["TotBatches"]
m["errors"] = m["TotOnErrors"]
m["analysis_time"] = m["TotAnalysisTime"]
m["index_time"] = m["TotIndexTime"]
m["term_searchers_started"] = m["TotTermSearchersStarted"]
m["term_searchers_finished"] = m["TotTermSearchersFinished"]
m["knn_searches"] = m["TotKNNSearches"]
m["synonym_searches"] = m["TotSynonymSearches"]
m["total_mutations_filtered"] = m["TotMutationsFiltered"]
m["num_bytes_read_at_query_time"] = m["TotBytesReadAtQueryTime"]
m["num_plain_text_bytes_indexed"] = m["TotIndexedPlainTextBytes"]
m["num_bytes_written_at_index_time"] = m["TotBytesWrittenAtIndexTime"]
m["num_items_introduced"] = m["TotIntroducedItems"]
m["num_items_persisted"] = m["TotPersistedItems"]
m["num_recs_to_persist"] = m["TotItemsToPersist"]
// total disk bytes found in index directory inclusive of older snapshots
m["num_bytes_used_disk"] = numBytesUsedDisk
// total disk bytes by the latest root index, exclusive of older snapshots
m["num_bytes_used_disk_by_root"] = numBytesOnDiskByRoot
// num_bytes_used_disk_by_root_reclaimable is an approximation about the
// reclaimable disk space in an index. (eg: from a full compaction)
m["num_bytes_used_disk_by_root_reclaimable"] = uint64(float64(numBytesOnDiskByRoot) *
indexSnapshot.reClaimableDocsRatio())
m["num_files_on_disk"] = numFilesOnDisk
m["num_root_memorysegments"] = m["TotMemorySegmentsAtRoot"]
m["num_root_filesegments"] = m["TotFileSegmentsAtRoot"]
m["num_persister_nap_pause_completed"] = m["TotPersisterNapPauseCompleted"]
m["num_persister_nap_merger_break"] = m["TotPersisterMergerNapBreak"]
m["total_compaction_written_bytes"] = m["TotFileMergeWrittenBytes"]
// the bool stat `index_bgthreads_active` indicates whether the background routines
// (which are responsible for the index to attain a steady state) are still
// doing some work.
if rootEpoch, ok := m["CurRootEpoch"].(uint64); ok {
if lastMergedEpoch, ok := m["LastMergedEpoch"].(uint64); ok {
if lastPersistedEpoch, ok := m["LastPersistedEpoch"].(uint64); ok {
m["index_bgthreads_active"] = !(lastMergedEpoch == rootEpoch && lastPersistedEpoch == rootEpoch)
}
}
}
// calculate the aggregate of all the segment's field stats
aggFieldStats := newFieldStats()
for _, segmentSnapshot := range indexSnapshot.Segments() {
if segmentSnapshot.stats != nil {
aggFieldStats.Aggregate(segmentSnapshot.stats)
}
}
aggFieldStatsMap := aggFieldStats.Fetch()
for statName, stats := range aggFieldStatsMap {
for fieldName, val := range stats {
m["field:"+fieldName+":"+statName] = val
}
}
return m
}
func (s *Scorch) Analyze(d index.Document) {
analyze(d, s.setSpatialAnalyzerPlugin)
}
type customAnalyzerPluginInitFunc func(field index.Field)
func (s *Scorch) setSpatialAnalyzerPlugin(f index.Field) {
if s.segPlugin != nil {
// check whether the current field is a custom tokenizable
// spatial field then set the spatial analyser plugin for
// overriding the tokenisation during the analysis stage.
if sf, ok := f.(index.TokenizableSpatialField); ok {
sf.SetSpatialAnalyzerPlugin(s.spatialPlugin)
}
}
}
func analyze(d index.Document, fn customAnalyzerPluginInitFunc) {
d.VisitFields(func(field index.Field) {
if field.Options().IsIndexed() {
if fn != nil {
fn(field)
}
field.Analyze()
if d.HasComposite() && field.Name() != "_id" {
// see if any of the composite fields need this
d.VisitComposite(func(cf index.CompositeField) {
cf.Compose(field.Name(), field.AnalyzedLength(), field.AnalyzedTokenFrequencies())
})
// Since the encoded geoShape is only necessary within the doc values
// of the geoShapeField, it has been removed from the field's term dictionary.
// However, '_all' field uses its term dictionary as its docValues, so it
// becomes necessary to add the geoShape into the '_all' field's term dictionary
if f, ok := field.(index.GeoShapeField); ok {
d.VisitComposite(func(cf index.CompositeField) {
geoshape := f.EncodedShape()
cf.Compose(field.Name(), 1, index.TokenFrequencies{
string(geoshape): &index.TokenFreq{
Term: geoshape,
Locations: []*index.TokenLocation{
{
Start: 0,
End: len(geoshape),
Position: 1,
},
},
},
})
})
}
}
}
})
if nd, ok := d.(index.NestedDocument); ok {
nd.VisitNestedDocuments(func(doc index.Document) {
doc.AddIDField()
analyze(doc, fn)
})
}
}
func (s *Scorch) AddEligibleForRemoval(epoch uint64) {
s.rootLock.Lock()
if s.root == nil || s.root.epoch != epoch {
s.eligibleForRemoval = append(s.eligibleForRemoval, epoch)
}
s.rootLock.Unlock()
}
func (s *Scorch) MemoryUsed() (memUsed uint64) {
indexSnapshot := s.currentSnapshot()
if indexSnapshot == nil {
return
}
defer func() {
_ = indexSnapshot.Close()
}()
// Account for current root snapshot overhead
memUsed += uint64(indexSnapshot.Size())
// Account for snapshot that the persister may be working on
persistEpoch := atomic.LoadUint64(&s.iStats.persistEpoch)
persistSnapshotSize := atomic.LoadUint64(&s.iStats.persistSnapshotSize)
if persistEpoch != 0 && indexSnapshot.epoch > persistEpoch {
// the snapshot that the persister is working on isn't the same as
// the current snapshot
memUsed += persistSnapshotSize
}
// Account for snapshot that the merger may be working on
mergeEpoch := atomic.LoadUint64(&s.iStats.mergeEpoch)
mergeSnapshotSize := atomic.LoadUint64(&s.iStats.mergeSnapshotSize)
if mergeEpoch != 0 && indexSnapshot.epoch > mergeEpoch {
// the snapshot that the merger is working on isn't the same as
// the current snapshot
memUsed += mergeSnapshotSize
}
memUsed += (atomic.LoadUint64(&s.iStats.newSegBufBytesAdded) -
atomic.LoadUint64(&s.iStats.newSegBufBytesRemoved))
memUsed += (atomic.LoadUint64(&s.iStats.analysisBytesAdded) -
atomic.LoadUint64(&s.iStats.analysisBytesRemoved))
return memUsed
}
func (s *Scorch) markIneligibleForRemoval(filename string) {
s.rootLock.Lock()
s.ineligibleForRemoval[filename] = true
s.rootLock.Unlock()
}
func (s *Scorch) unmarkIneligibleForRemoval(filename string) {
s.rootLock.Lock()
delete(s.ineligibleForRemoval, filename)
s.rootLock.Unlock()
}
func init() {
err := registry.RegisterIndexType(Name, NewScorch)
if err != nil {
panic(err)
}
}
func parseToTimeDuration(i interface{}) (time.Duration, error) {
switch v := i.(type) {
case string:
return time.ParseDuration(v)
default:
return 0, fmt.Errorf("expects a duration string")
}
}
func parseToInteger(i interface{}) (int, error) {
switch v := i.(type) {
case float64:
return int(v), nil
case int:
return v, nil
default:
return 0, fmt.Errorf("expects int or float64 value")
}
}
// Holds Zap's field level stats at a segment level
type fieldStats struct {
// StatName -> FieldName -> value
statMap map[string]map[string]uint64
}
// Add the data into the map after checking if the statname is valid
func (fs *fieldStats) Store(statName, fieldName string, value uint64) {
if _, exists := fs.statMap[statName]; !exists {
fs.statMap[statName] = make(map[string]uint64)
}
fs.statMap[statName][fieldName] = value
}
// Combine the given stats map with the existing map
func (fs *fieldStats) Aggregate(stats segment.FieldStats) {
statMap := stats.Fetch()
if statMap == nil {
return
}
for statName, statMap := range statMap {
if _, exists := fs.statMap[statName]; !exists {
fs.statMap[statName] = make(map[string]uint64)
}
for fieldName, val := range statMap {
if _, exists := fs.statMap[statName][fieldName]; !exists {
fs.statMap[statName][fieldName] = 0
}
fs.statMap[statName][fieldName] += val
}
}
}
// Returns the stats map
func (fs *fieldStats) Fetch() map[string]map[string]uint64 {
if fs == nil {
return nil
}
return fs.statMap
}
// Initializes an empty stats map
func newFieldStats() *fieldStats {
rv := &fieldStats{
statMap: map[string]map[string]uint64{},
}
return rv
}
// CopyReader returns a low-level accessor for index data, ensuring persisted segments
// remain on disk for backup, preventing race conditions with the persister/merger cleanup.
// Close the reader after backup to allow segment removal by the persister/merger.
func (s *Scorch) CopyReader() index.CopyReader {
s.rootLock.Lock()
rv := s.root
if rv != nil {
rv.AddRef()
var fileName string
// schedule a backup for all the segments from the root. Note that the
// both the unpersisted and persisted segments are scheduled for backup.
// because during the backup, the unpersisted segments may get persisted and
// hence we need to protect both the unpersisted and persisted segments from removal
// by the cleanup routine during the online backup
for _, seg := range rv.segment {
if perSeg, ok := seg.segment.(segment.PersistedSegment); ok {
// segment is persisted
fileName = filepath.Base(perSeg.Path())
} else {
// segment is not persisted
// the name of the segment file that is generated if the
// the segment is persisted in the future.
fileName = zapFileName(seg.id)
}
rv.parent.copyScheduled[fileName]++
}
}
s.rootLock.Unlock()
return rv
}
// external API to fire a scorch event (EventKindIndexStart) externally from bleve
func (s *Scorch) FireIndexEvent() {
s.fireEvent(EventKindIndexStart, 0)
}
// Updates bolt db with the given field info. Existing field info already in bolt
// will be merged before persisting. The index mapping is also overwritted both
// in bolt as well as the index snapshot
func (s *Scorch) UpdateFields(fieldInfo map[string]*index.UpdateFieldInfo, mappingBytes []byte) error {
err := s.updateBolt(fieldInfo, mappingBytes)
if err != nil {
return err
}
// Pass the update field info to all snapshots and segment bases
s.root.UpdateFieldsInfo(fieldInfo)
return nil
}
func (s *Scorch) OpenMeta() error {
if s.rootBolt == nil {
err := s.openBolt()
if err != nil {
return err
}
}
return nil
}
// Merge and update deleted field info and rewrite index mapping
func (s *Scorch) updateBolt(fieldInfo map[string]*index.UpdateFieldInfo, mappingBytes []byte) error {
return s.rootBolt.Update(func(tx *bolt.Tx) error {
snapshots := tx.Bucket(util.BoltSnapshotsBucket)
if snapshots == nil {
return nil
}
c := snapshots.Cursor()
for k, _ := c.Last(); k != nil; k, _ = c.Prev() {
_, _, err := decodeUvarintAscending(k)
if err != nil {
fmt.Printf("unable to parse segment epoch %x, continuing", k)
continue
}
snapshot := snapshots.Bucket(k)
cc := snapshot.Cursor()
for kk, _ := cc.First(); kk != nil; kk, _ = cc.Next() {
if kk[0] == util.BoltInternalKey[0] {
internalBucket := snapshot.Bucket(kk)
if internalBucket == nil {
return fmt.Errorf("segment key, but bucket missing %x", kk)
}
err = internalBucket.Put(util.MappingInternalKey, mappingBytes)
if err != nil {
return err
}
} else if kk[0] != util.BoltMetaDataKey[0] {
segmentBucket := snapshot.Bucket(kk)
if segmentBucket == nil {
return fmt.Errorf("segment key, but bucket missing %x", kk)
}
var updatedFields map[string]*index.UpdateFieldInfo
updatedFieldBytes := segmentBucket.Get(util.BoltUpdatedFieldsKey)
if updatedFieldBytes != nil {
err := json.Unmarshal(updatedFieldBytes, &updatedFields)
if err != nil {
return fmt.Errorf("error reading updated field bytes: %v", err)
}
for field, info := range fieldInfo {
if val, ok := updatedFields[field]; ok {
updatedFields[field] = &index.UpdateFieldInfo{
Deleted: info.Deleted || val.Deleted,
Store: info.Store || val.Store,
DocValues: info.DocValues || val.DocValues,
Index: info.Index || val.Index,
}
} else {
updatedFields[field] = info
}
}
} else {
updatedFields = fieldInfo
}
b, err := json.Marshal(updatedFields)
if err != nil {
return err
}
err = segmentBucket.Put(util.BoltUpdatedFieldsKey, b)
if err != nil {
return err
}
}
}
}
return nil
})
}
================================================
FILE: index/scorch/scorch_test.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"context"
"encoding/json"
"fmt"
"log"
"math/rand"
"os"
"path/filepath"
"reflect"
"regexp"
"strconv"
"sync"
"sync/atomic"
"testing"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
regexpTokenizer "github.com/blevesearch/bleve/v2/analysis/tokenizer/regexp"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/index/scorch/mergeplan"
"github.com/blevesearch/bleve/v2/mapping"
index "github.com/blevesearch/bleve_index_api"
)
func init() {
// override for tests
DefaultPersisterNapTimeMSec = 1
}
func InitTest(cfg map[string]interface{}) error {
return os.RemoveAll(cfg["path"].(string))
}
func DestroyTest(cfg map[string]interface{}) error {
return os.RemoveAll(cfg["path"].(string))
}
func CreateConfig(name string) map[string]interface{} {
// TODO: Use t.Name() when Go 1.7 support terminates.
rv := make(map[string]interface{})
rv["path"] = os.TempDir() + "/bleve-scorch-test-" + name
return rv
}
var testAnalyzer = &analysis.DefaultAnalyzer{
Tokenizer: regexpTokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)),
}
func TestIndexOpenReopen(t *testing.T) {
cfg := CreateConfig("TestIndexOpenReopen")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
var expectedCount uint64
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// insert a doc
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// now close it
err = idx.Close()
if err != nil {
t.Fatal(err)
}
idx, err = NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
// check the doc count again after reopening it
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// now close it
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexOpenReopenWithInsert(t *testing.T) {
cfg := CreateConfig("TestIndexOpenReopen")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
var expectedCount uint64
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// insert a doc
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// now close it
err = idx.Close()
if err != nil {
t.Fatal(err)
}
// try to open the index and insert data
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
// insert a doc
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
// check the doc count again after reopening it
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// now close it
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexInsert(t *testing.T) {
cfg := CreateConfig("TestIndexInsert")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexInsertThenDelete(t *testing.T) {
cfg := CreateConfig("TestIndexInsertThenDelete")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
doc2 := document.NewDocument("2")
doc2.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc2)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
iid, err := reader.InternalID("1")
if err != nil || iid == nil {
t.Errorf("unexpected on doc id 1")
}
iid, err = reader.InternalID("2")
if err != nil || iid == nil {
t.Errorf("unexpected on doc id 2")
}
iid, err = reader.InternalID("3")
if err != nil || iid != nil {
t.Errorf("unexpected on doc id 3")
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
err = idx.Delete("1")
if err != nil {
t.Errorf("Error deleting entry from index: %v", err)
}
expectedCount--
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
storedDoc, err := reader.Document("1")
if err != nil {
t.Error(err)
}
if storedDoc != nil {
t.Errorf("expected nil for deleted stored doc #1, got %v", storedDoc)
}
storedDoc, err = reader.Document("2")
if err != nil {
t.Error(err)
}
if storedDoc == nil {
t.Errorf("expected stored doc for #2, got nil")
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// now close it
err = idx.Close()
if err != nil {
t.Fatal(err)
}
idx, err = NewScorch(Name, cfg, analysisQueue) // reopen
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error reopening index: %v", err)
}
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
storedDoc, err = reader.Document("1")
if err != nil {
t.Error(err)
}
if storedDoc != nil {
t.Errorf("expected nil for deleted stored doc #1, got %v", storedDoc)
}
storedDoc, err = reader.Document("2")
if err != nil {
t.Error(err)
}
if storedDoc == nil {
t.Errorf("expected stored doc for #2, got nil")
}
iid, err = reader.InternalID("1")
if err != nil || iid != nil {
t.Errorf("unexpected on doc id 1")
}
iid, err = reader.InternalID("2")
if err != nil || iid == nil {
t.Errorf("unexpected on doc id 2, should exist")
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
err = idx.Delete("2")
if err != nil {
t.Errorf("Error deleting entry from index: %v", err)
}
expectedCount--
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
storedDoc, err = reader.Document("1")
if err != nil {
t.Error(err)
}
if storedDoc != nil {
t.Errorf("expected nil for deleted stored doc #1, got %v", storedDoc)
}
storedDoc, err = reader.Document("2")
if err != nil {
t.Error(err)
}
if storedDoc != nil {
t.Errorf("expected nil for deleted stored doc #2, got nil")
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexInsertThenUpdate(t *testing.T) {
cfg := CreateConfig("TestIndexInsertThenUpdate")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
var expectedCount uint64
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
// this update should overwrite one term, and introduce one new one
doc = document.NewDocument("1")
doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test fail"), testAnalyzer))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error deleting entry from index: %v", err)
}
// now do another update that should remove one of the terms
doc = document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("fail")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error deleting entry from index: %v", err)
}
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexInsertMultiple(t *testing.T) {
cfg := CreateConfig("TestIndexInsertMultiple")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
doc = document.NewDocument("3")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexInsertWithStore(t *testing.T) {
cfg := CreateConfig("TestIndexInsertWithStore")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), index.IndexField|index.StoreField))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
storedDocInt, err := indexReader.Document("1")
if err != nil {
t.Error(err)
}
storedDoc := storedDocInt.(*document.Document)
if len(storedDoc.Fields) != 1 {
t.Errorf("expected 1 stored field, got %d", len(storedDoc.Fields))
}
for _, field := range storedDoc.Fields {
if field.Name() == "name" {
textField, ok := field.(*document.TextField)
if !ok {
t.Errorf("expected text field")
}
if string(textField.Value()) != "test" {
t.Errorf("expected field content 'test', got '%s'", string(textField.Value()))
}
} else if field.Name() == "_id" {
t.Errorf("not expecting _id field")
}
}
}
func TestIndexInternalCRUD(t *testing.T) {
cfg := CreateConfig("TestIndexInternalCRUD")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
if len(indexReader.(*IndexSnapshot).segment) != 0 {
t.Errorf("expected 0 segments")
}
// get something that doesn't exist yet
val, err := indexReader.GetInternal([]byte("key"))
if err != nil {
t.Error(err)
}
if val != nil {
t.Errorf("expected nil, got %s", val)
}
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
// set
err = idx.SetInternal([]byte("key"), []byte("abc"))
if err != nil {
t.Error(err)
}
indexReader2, err := idx.Reader()
if err != nil {
t.Error(err)
}
if len(indexReader2.(*IndexSnapshot).segment) != 0 {
t.Errorf("expected 0 segments")
}
// get
val, err = indexReader2.GetInternal([]byte("key"))
if err != nil {
t.Error(err)
}
if string(val) != "abc" {
t.Errorf("expected %s, got '%s'", "abc", val)
}
err = indexReader2.Close()
if err != nil {
t.Fatal(err)
}
// delete
err = idx.DeleteInternal([]byte("key"))
if err != nil {
t.Error(err)
}
indexReader3, err := idx.Reader()
if err != nil {
t.Error(err)
}
if len(indexReader3.(*IndexSnapshot).segment) != 0 {
t.Errorf("expected 0 segments")
}
// get again
val, err = indexReader3.GetInternal([]byte("key"))
if err != nil {
t.Error(err)
}
if val != nil {
t.Errorf("expected nil, got %s", val)
}
err = indexReader3.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexBatch(t *testing.T) {
cfg := CreateConfig("TestIndexBatch")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
// first create 2 docs the old fashioned way
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
// now create a batch which does 3 things
// insert new doc
// update existing doc
// delete existing doc
// net document count change 0
batch := index.NewBatch()
doc = document.NewDocument("3")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
batch.Update(doc)
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2updated")))
batch.Update(doc)
batch.Delete("1")
err = idx.Batch(batch)
if err != nil {
t.Error(err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
numSegments := len(indexReader.(*IndexSnapshot).segment)
if numSegments <= 0 {
t.Errorf("expected some segments, got: %d", numSegments)
}
docCount, err := indexReader.DocCount()
if err != nil {
t.Fatal(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
docIDReader, err := indexReader.DocIDReaderAll()
if err != nil {
t.Error(err)
}
var docIds []index.IndexInternalID
docID, err := docIDReader.Next()
for docID != nil && err == nil {
docIds = append(docIds, docID)
docID, err = docIDReader.Next()
}
if err != nil {
t.Error(err)
}
externalDocIds := map[string]struct{}{}
// convert back to external doc ids
for _, id := range docIds {
externalID, err := indexReader.ExternalID(id)
if err != nil {
t.Fatal(err)
}
externalDocIds[externalID] = struct{}{}
}
expectedDocIds := map[string]struct{}{
"2": {},
"3": {},
}
if !reflect.DeepEqual(externalDocIds, expectedDocIds) {
t.Errorf("expected ids: %v, got ids: %v", expectedDocIds, externalDocIds)
}
}
func TestIndexBatchWithCallbacks(t *testing.T) {
cfg := CreateConfig("TestIndexBatchWithCallbacks")
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
cerr := idx.Close()
if cerr != nil {
t.Fatal(cerr)
}
}()
// Check that callback function works
var wg sync.WaitGroup
wg.Add(1)
batch := index.NewBatch()
doc := document.NewDocument("3")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
batch.Update(doc)
batch.SetPersistedCallback(func(e error) {
wg.Done()
})
err = idx.Batch(batch)
if err != nil {
t.Error(err)
}
wg.Wait()
// test has no assertion but will timeout if callback doesn't fire
}
func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
cfg := CreateConfig("TestIndexInsertUpdateDeleteWithMultipleTypesStored")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), index.IndexField|index.StoreField))
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 35.99, index.IndexField|index.StoreField))
df, err := document.NewDateTimeFieldWithIndexingOptions("unixEpoch", []uint64{}, time.Unix(0, 0), time.RFC3339, index.IndexField|index.StoreField)
if err != nil {
t.Error(err)
}
doc.AddField(df)
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
storedDocInt, err := indexReader.Document("1")
if err != nil {
t.Error(err)
}
storedDoc := storedDocInt.(*document.Document)
err = indexReader.Close()
if err != nil {
t.Error(err)
}
if len(storedDoc.Fields) != 3 {
t.Errorf("expected 3 stored field, got %d", len(storedDoc.Fields))
}
for _, field := range storedDoc.Fields {
if field.Name() == "name" {
textField, ok := field.(*document.TextField)
if !ok {
t.Errorf("expected text field")
}
if string(textField.Value()) != "test" {
t.Errorf("expected field content 'test', got '%s'", string(textField.Value()))
}
} else if field.Name() == "age" {
numField, ok := field.(*document.NumericField)
if !ok {
t.Errorf("expected numeric field")
}
numFieldNumer, err := numField.Number()
if err != nil {
t.Error(err)
} else {
if numFieldNumer != 35.99 {
t.Errorf("expected numeric value 35.99, got %f", numFieldNumer)
}
}
} else if field.Name() == "unixEpoch" {
dateField, ok := field.(*document.DateTimeField)
if !ok {
t.Errorf("expected date field")
}
dateFieldDate, _, err := dateField.DateTime()
if err != nil {
t.Error(err)
} else {
if dateFieldDate != time.Unix(0, 0).UTC() {
t.Errorf("expected date value unix epoch, got %v", dateFieldDate)
}
}
} else if field.Name() == "_id" {
t.Errorf("not expecting _id field")
}
}
// now update the document, but omit one of the fields
doc = document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("testup"), index.IndexField|index.StoreField))
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 36.99, index.IndexField|index.StoreField))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader2, err := idx.Reader()
if err != nil {
t.Error(err)
}
// expected doc count shouldn't have changed
docCount, err = indexReader2.DocCount()
if err != nil {
t.Fatal(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
// should only get 2 fields back now though
storedDocInt, err = indexReader2.Document("1")
if err != nil {
t.Error(err)
}
storedDoc = storedDocInt.(*document.Document)
err = indexReader2.Close()
if err != nil {
t.Error(err)
}
if len(storedDoc.Fields) != 2 {
t.Errorf("expected 2 stored field, got %d", len(storedDoc.Fields))
}
for _, field := range storedDoc.Fields {
if field.Name() == "name" {
textField, ok := field.(*document.TextField)
if !ok {
t.Errorf("expected text field")
}
if string(textField.Value()) != "testup" {
t.Errorf("expected field content 'testup', got '%s'", string(textField.Value()))
}
} else if field.Name() == "age" {
numField, ok := field.(*document.NumericField)
if !ok {
t.Errorf("expected numeric field")
}
numFieldNumer, err := numField.Number()
if err != nil {
t.Error(err)
} else {
if numFieldNumer != 36.99 {
t.Errorf("expected numeric value 36.99, got %f", numFieldNumer)
}
}
} else if field.Name() == "_id" {
t.Errorf("not expecting _id field")
}
}
// now delete the document
err = idx.Delete("1")
if err != nil {
t.Errorf("Error deleting entry from index: %v", err)
}
expectedCount--
// expected doc count shouldn't have changed
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexInsertFields(t *testing.T) {
cfg := CreateConfig("TestIndexInsertFields")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), index.IndexField|index.StoreField))
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 35.99, index.IndexField|index.StoreField))
dateField, err := document.NewDateTimeFieldWithIndexingOptions("unixEpoch", []uint64{}, time.Unix(0, 0), time.RFC3339, index.IndexField|index.StoreField)
if err != nil {
t.Error(err)
}
doc.AddField(dateField)
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
fields, err := indexReader.Fields()
if err != nil {
t.Error(err)
} else {
fieldsMap := map[string]struct{}{}
for _, field := range fields {
fieldsMap[field] = struct{}{}
}
expectedFieldsMap := map[string]struct{}{
"_id": {},
"name": {},
"age": {},
"unixEpoch": {},
}
if !reflect.DeepEqual(fieldsMap, expectedFieldsMap) {
t.Errorf("expected fields: %v, got %v", expectedFieldsMap, fieldsMap)
}
}
}
func TestIndexUpdateComposites(t *testing.T) {
cfg := CreateConfig("TestIndexUpdateComposites")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), index.IndexField|index.StoreField))
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), index.IndexField|index.StoreField))
doc.AddField(document.NewCompositeFieldWithIndexingOptions("_all", true, nil, nil, index.IndexField))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
// now lets update it
doc = document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("testupdated"), index.IndexField|index.StoreField))
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("misterupdated"), index.IndexField|index.StoreField))
doc.AddField(document.NewCompositeFieldWithIndexingOptions("_all", true, nil, nil, index.IndexField))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
// make sure new values are in index
storedDocInt, err := indexReader.Document("1")
if err != nil {
t.Error(err)
}
storedDoc := storedDocInt.(*document.Document)
if len(storedDoc.Fields) != 2 {
t.Errorf("expected 2 stored field, got %d", len(storedDoc.Fields))
}
for _, field := range storedDoc.Fields {
if field.Name() == "name" {
textField, ok := field.(*document.TextField)
if !ok {
t.Errorf("expected text field")
}
if string(textField.Value()) != "testupdated" {
t.Errorf("expected field content 'test', got '%s'", string(textField.Value()))
}
} else if field.Name() == "_id" {
t.Errorf("not expecting _id field")
}
}
}
func TestIndexTermReaderCompositeFields(t *testing.T) {
cfg := CreateConfig("TestIndexTermReaderCompositeFields")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), index.IndexField|index.StoreField|index.IncludeTermVectors))
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), index.IndexField|index.StoreField|index.IncludeTermVectors))
doc.AddField(document.NewCompositeFieldWithIndexingOptions("_all", true, nil, nil, index.IndexField|index.IncludeTermVectors))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
termFieldReader, err := indexReader.TermFieldReader(context.TODO(), []byte("mister"), "_all", true, true, true)
if err != nil {
t.Error(err)
}
tfd, err := termFieldReader.Next(nil)
for tfd != nil && err == nil {
externalID, err := indexReader.ExternalID(tfd.ID)
if err != nil {
t.Fatal(err)
}
if externalID != "1" {
t.Errorf("expected to find document id 1")
}
tfd, err = termFieldReader.Next(nil)
if err != nil {
t.Error(err)
}
}
if err != nil {
t.Error(err)
}
}
func TestIndexDocValueReader(t *testing.T) {
cfg := CreateConfig("TestIndexDocumentVisitFieldTerms")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), index.IndexField|index.StoreField|index.IncludeTermVectors))
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), index.IndexField|index.StoreField|index.IncludeTermVectors))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
actualFieldTerms := make(fieldTerms)
internalID, err := indexReader.InternalID("1")
if err != nil {
t.Fatal(err)
}
dvr, err := indexReader.DocValueReader([]string{"name", "title"})
if err != nil {
t.Error(err)
}
err = dvr.VisitDocValues(internalID, func(field string, term []byte) {
actualFieldTerms[field] = append(actualFieldTerms[field], string(term))
})
if err != nil {
t.Error(err)
}
expectedFieldTerms := fieldTerms{
"name": []string{"test"},
"title": []string{"mister"},
}
if !reflect.DeepEqual(actualFieldTerms, expectedFieldTerms) {
t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, actualFieldTerms)
}
}
func TestDocValueReaderConcurrent(t *testing.T) {
cfg := CreateConfig("TestFieldTermsConcurrent")
// setting path to empty string disables persistence/merging
// which ensures we have in-memory segments
// which is important for this test, to trigger the right code
// path, where fields exist, but have NOT been uninverted by
// the Segment impl (in memory segments are still SegmentBase)
cfg["path"] = ""
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Fatal(err)
}
}()
mp := mapping.NewIndexMapping()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
cerr := idx.Close()
if cerr != nil {
t.Fatal(cerr)
}
}()
// create a single bath (leading to 1 in-memory segment)
// have one field named "name" and 100 others named f0-f99
batch := index.NewBatch()
for i := 0; i < 1000; i++ {
data := map[string]string{
"name": fmt.Sprintf("doc-%d", i),
}
for j := 0; j < 100; j++ {
data[fmt.Sprintf("f%d", j)] = fmt.Sprintf("v%d", i)
}
doc := document.NewDocument(fmt.Sprintf("%d", i))
err = mp.MapDocument(doc, data)
if err != nil {
t.Errorf("error mapping doc: %v", err)
}
batch.Update(doc)
}
err = idx.Batch(batch)
if err != nil {
t.Fatal(err)
}
// now have 10 goroutines try to visit field values for doc 1
// in a random field
var wg sync.WaitGroup
for j := 0; j < 10; j++ {
wg.Add(1)
go func() {
r, err := idx.Reader()
if err != nil {
t.Errorf("error getting reader: %v", err)
wg.Done()
return
}
docNumber, err := r.InternalID("1")
if err != nil {
t.Errorf("error getting internal ID: %v", err)
wg.Done()
return
}
dvr, err := r.DocValueReader([]string{fmt.Sprintf("f%d", rand.Intn(100))})
if err != nil {
t.Errorf("error getting doc value reader: %v", err)
wg.Done()
return
}
err = dvr.VisitDocValues(docNumber, func(field string, term []byte) {})
if err != nil {
t.Errorf("error visiting doc values: %v", err)
wg.Done()
return
}
wg.Done()
}()
}
wg.Wait()
}
func TestConcurrentUpdate(t *testing.T) {
cfg := CreateConfig("TestConcurrentUpdate")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
// do some concurrent updates
var wg sync.WaitGroup
for i := 0; i < 100; i++ {
wg.Add(1)
go func(i int) {
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions(strconv.Itoa(i), []uint64{}, []byte(strconv.Itoa(i)), index.StoreField))
err := idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
wg.Done()
}(i)
}
wg.Wait()
// now load the name field and see what we get
r, err := idx.Reader()
if err != nil {
log.Fatal(err)
}
defer func() {
err := r.Close()
if err != nil {
t.Fatal(err)
}
}()
docInt, err := r.Document("1")
if err != nil {
log.Fatal(err)
}
doc := docInt.(*document.Document)
if len(doc.Fields) > 2 {
t.Errorf("expected no more than 2 fields, found %d", len(doc.Fields))
}
}
func TestLargeField(t *testing.T) {
cfg := CreateConfig("TestLargeField")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var largeFieldValue []byte
for len(largeFieldValue) < 4096 {
largeFieldValue = append(largeFieldValue, bleveWikiArticle1K...)
}
d := document.NewDocument("large")
f := document.NewTextFieldWithIndexingOptions("desc", nil, largeFieldValue, index.IndexField|index.StoreField)
d.AddField(f)
err = idx.Update(d)
if err != nil {
t.Fatal(err)
}
}
var bleveWikiArticle1K = []byte(`Boiling liquid expanding vapor explosion
From Wikipedia, the free encyclopedia
See also: Boiler explosion and Steam explosion
Flames subsequent to a flammable liquid BLEVE from a tanker. BLEVEs do not necessarily involve fire.
This article's tone or style may not reflect the encyclopedic tone used on Wikipedia. See Wikipedia's guide to writing better articles for suggestions. (July 2013)
A boiling liquid expanding vapor explosion (BLEVE, /ˈblɛviː/ blev-ee) is an explosion caused by the rupture of a vessel containing a pressurized liquid above its boiling point.[1]
Contents [hide]
1 Mechanism
1.1 Water example
1.2 BLEVEs without chemical reactions
2 Fires
3 Incidents
4 Safety measures
5 See also
6 References
7 External links
Mechanism[edit]
This section needs additional citations for verification. Please help improve this article by adding citations to reliable sources. Unsourced material may be challenged and removed. (July 2013)
There are three characteristics of liquids which are relevant to the discussion of a BLEVE:`)
func TestIndexDocValueReaderWithMultipleDocs(t *testing.T) {
cfg := CreateConfig("TestIndexDocumentVisitFieldTermsWithMultipleDocs")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), index.IndexField|index.StoreField|index.IncludeTermVectors))
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), index.IndexField|index.StoreField|index.IncludeTermVectors))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
actualFieldTerms := make(fieldTerms)
docNumber, err := indexReader.InternalID("1")
if err != nil {
t.Fatal(err)
}
dvr, err := indexReader.DocValueReader([]string{"name", "title"})
if err != nil {
t.Fatal(err)
}
err = dvr.VisitDocValues(docNumber, func(field string, term []byte) {
actualFieldTerms[field] = append(actualFieldTerms[field], string(term))
})
if err != nil {
t.Error(err)
}
expectedFieldTerms := fieldTerms{
"name": []string{"test"},
"title": []string{"mister"},
}
if !reflect.DeepEqual(actualFieldTerms, expectedFieldTerms) {
t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, actualFieldTerms)
}
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
doc2 := document.NewDocument("2")
doc2.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test2"), index.IndexField|index.StoreField|index.IncludeTermVectors))
doc2.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister2"), index.IndexField|index.StoreField|index.IncludeTermVectors))
err = idx.Update(doc2)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err = idx.Reader()
if err != nil {
t.Error(err)
}
actualFieldTerms = make(fieldTerms)
docNumber, err = indexReader.InternalID("2")
if err != nil {
t.Fatal(err)
}
dvr, err = indexReader.DocValueReader([]string{"name", "title"})
if err != nil {
t.Fatal(err)
}
err = dvr.VisitDocValues(docNumber, func(field string, term []byte) {
actualFieldTerms[field] = append(actualFieldTerms[field], string(term))
})
if err != nil {
t.Error(err)
}
expectedFieldTerms = fieldTerms{
"name": []string{"test2"},
"title": []string{"mister2"},
}
if !reflect.DeepEqual(actualFieldTerms, expectedFieldTerms) {
t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, actualFieldTerms)
}
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
doc3 := document.NewDocument("3")
doc3.AddField(document.NewTextFieldWithIndexingOptions("name3", []uint64{}, []byte("test3"), index.IndexField|index.StoreField|index.IncludeTermVectors))
doc3.AddField(document.NewTextFieldWithIndexingOptions("title3", []uint64{}, []byte("mister3"), index.IndexField|index.StoreField|index.IncludeTermVectors))
err = idx.Update(doc3)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err = idx.Reader()
if err != nil {
t.Error(err)
}
actualFieldTerms = make(fieldTerms)
docNumber, err = indexReader.InternalID("3")
if err != nil {
t.Fatal(err)
}
dvr, err = indexReader.DocValueReader([]string{"name3", "title3"})
if err != nil {
t.Fatal(err)
}
err = dvr.VisitDocValues(docNumber, func(field string, term []byte) {
actualFieldTerms[field] = append(actualFieldTerms[field], string(term))
})
if err != nil {
t.Error(err)
}
expectedFieldTerms = fieldTerms{
"name3": []string{"test3"},
"title3": []string{"mister3"},
}
if !reflect.DeepEqual(actualFieldTerms, expectedFieldTerms) {
t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, actualFieldTerms)
}
actualFieldTerms = make(fieldTerms)
docNumber, err = indexReader.InternalID("1")
if err != nil {
t.Fatal(err)
}
dvr, err = indexReader.DocValueReader([]string{"name", "title"})
if err != nil {
t.Fatal(err)
}
err = dvr.VisitDocValues(docNumber, func(field string, term []byte) {
actualFieldTerms[field] = append(actualFieldTerms[field], string(term))
})
if err != nil {
t.Error(err)
}
expectedFieldTerms = fieldTerms{
"name": []string{"test"},
"title": []string{"mister"},
}
if !reflect.DeepEqual(actualFieldTerms, expectedFieldTerms) {
t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, actualFieldTerms)
}
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexDocValueReaderWithMultipleFieldOptions(t *testing.T) {
cfg := CreateConfig("TestIndexDocumentVisitFieldTermsWithMultipleFieldOptions")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
// mix of field options, this exercises the run time/ on the fly un inverting of
// doc values for custom options enabled field like designation, dept.
options := index.IndexField | index.StoreField | index.IncludeTermVectors
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test"))) // default doc value persisted
doc.AddField(document.NewTextField("title", []uint64{}, []byte("mister"))) // default doc value persisted
doc.AddField(document.NewTextFieldWithIndexingOptions("designation", []uint64{}, []byte("engineer"), options))
doc.AddField(document.NewTextFieldWithIndexingOptions("dept", []uint64{}, []byte("bleve"), options))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
actualFieldTerms := make(fieldTerms)
docNumber, err := indexReader.InternalID("1")
if err != nil {
t.Fatal(err)
}
dvr, err := indexReader.DocValueReader([]string{"name", "designation", "dept"})
if err != nil {
t.Fatal(err)
}
err = dvr.VisitDocValues(docNumber, func(field string, term []byte) {
actualFieldTerms[field] = append(actualFieldTerms[field], string(term))
})
if err != nil {
t.Error(err)
}
expectedFieldTerms := fieldTerms{
"name": []string{"test"},
"designation": []string{"engineer"},
"dept": []string{"bleve"},
}
if !reflect.DeepEqual(actualFieldTerms, expectedFieldTerms) {
t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, actualFieldTerms)
}
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}
func TestAllFieldWithDifferentTermVectorsEnabled(t *testing.T) {
// Based on https://github.com/blevesearch/bleve/issues/895 from xeizmendi
cfg := CreateConfig("TestAllFieldWithDifferentTermVectorsEnabled")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
testConfig := cfg
mp := mapping.NewIndexMapping()
keywordMapping := mapping.NewTextFieldMapping()
keywordMapping.Analyzer = keyword.Name
keywordMapping.IncludeTermVectors = false
keywordMapping.IncludeInAll = true
textMapping := mapping.NewTextFieldMapping()
textMapping.Analyzer = standard.Name
textMapping.IncludeTermVectors = true
textMapping.IncludeInAll = true
docMapping := mapping.NewDocumentStaticMapping()
docMapping.AddFieldMappingsAt("keyword", keywordMapping)
docMapping.AddFieldMappingsAt("text", textMapping)
mp.DefaultMapping = docMapping
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch("storeName", testConfig, analysisQueue)
if err != nil {
log.Fatalln(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
data := map[string]string{
"keyword": "something",
"text": "A sentence that includes something within.",
}
doc := document.NewDocument("1")
err = mp.MapDocument(doc, data)
if err != nil {
t.Errorf("error mapping doc: %v", err)
}
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
}
func TestForceVersion(t *testing.T) {
cfg := map[string]interface{}{}
cfg["forceSegmentType"] = "zap"
cfg["forceSegmentVersion"] = 11
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatalf("error opening a supported version: %v", err)
}
s := idx.(*Scorch)
if s.segPlugin.Version() != 11 {
t.Fatalf("wrong segment wrapper version loaded, expected %d got %d", 11, s.segPlugin.Version())
}
cfg["forceSegmentVersion"] = 12
idx, err = NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatalf("error opening a supported version: %v", err)
}
s = idx.(*Scorch)
if s.segPlugin.Version() != 12 {
t.Fatalf("wrong segment wrapper version loaded, expected %d got %d", 12, s.segPlugin.Version())
}
cfg["forceSegmentVersion"] = 10
_, err = NewScorch(Name, cfg, analysisQueue)
if err == nil {
t.Fatalf("expected an error opening an unsupported version, got nil")
}
}
func TestIndexForceMerge(t *testing.T) {
cfg := CreateConfig("TestIndexForceMerge")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
tmp := struct {
MaxSegmentsPerTier int `json:"maxSegmentsPerTier"`
SegmentsPerMergeTask int `json:"segmentsPerMergeTask"`
FloorSegmentSize int64 `json:"floorSegmentSize"`
}{
int(1),
int(1),
int64(2),
}
cfg["scorchMergePlanOptions"] = &tmp
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
var expectedCount uint64
batch := index.NewBatch()
for i := 0; i < 10; i++ {
doc := document.NewDocument(fmt.Sprintf("doc1-%d", i))
doc.AddField(document.NewTextField("name", []uint64{}, []byte(fmt.Sprintf("text1-%d", i))))
batch.Update(doc)
doc = document.NewDocument(fmt.Sprintf("doc2-%d", i))
doc.AddField(document.NewTextField("name", []uint64{}, []byte(fmt.Sprintf("text2-%d", i))))
batch.Update(doc)
err = idx.Batch(batch)
if err != nil {
t.Error(err)
}
batch.Reset()
expectedCount += 2
}
// verify doc count
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
docCount, err := indexReader.DocCount()
if err != nil {
t.Fatal(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
var si *Scorch
var ok bool
if si, ok = idx.(*Scorch); !ok {
t.Errorf("expects a scorch index")
}
nfs := atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot)
if nfs != 10 {
t.Errorf("expected 10 root file segments, got: %d", nfs)
}
ctx := context.Background()
for atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot) != 1 {
err := si.ForceMerge(ctx, &mergeplan.MergePlanOptions{
MaxSegmentsPerTier: 1,
MaxSegmentSize: 10000,
SegmentsPerMergeTask: 10,
FloorSegmentSize: 10000,
})
if err != nil {
t.Errorf("ForceMerge failed, err: %v", err)
}
}
// verify the final root segment count
if atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot) != 1 {
t.Errorf("expected a single root file segments, got: %d",
atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot))
}
// verify with an invalid merge plan
err = si.ForceMerge(ctx, &mergeplan.MergePlanOptions{
MaxSegmentsPerTier: 1,
MaxSegmentSize: 1 << 33,
SegmentsPerMergeTask: 10,
FloorSegmentSize: 10000,
})
if err != mergeplan.ErrMaxSegmentSizeTooLarge {
t.Errorf("ForceMerge expected to fail with ErrMaxSegmentSizeTooLarge")
}
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}
func TestCancelIndexForceMerge(t *testing.T) {
cfg := CreateConfig("TestCancelIndexForceMerge")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
tmp := struct {
MaxSegmentsPerTier int `json:"maxSegmentsPerTier"`
SegmentsPerMergeTask int `json:"segmentsPerMergeTask"`
FloorSegmentSize int64 `json:"floorSegmentSize"`
}{
int(1),
int(1),
int64(2),
}
cfg["scorchMergePlanOptions"] = &tmp
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
var expectedCount uint64
batch := index.NewBatch()
for i := 0; i < 20; i++ {
doc := document.NewDocument(fmt.Sprintf("doc1-%d", i))
doc.AddField(document.NewTextField("name", []uint64{}, []byte(fmt.Sprintf("text1-%d", i))))
batch.Update(doc)
doc = document.NewDocument(fmt.Sprintf("doc2-%d", i))
doc.AddField(document.NewTextField("name", []uint64{}, []byte(fmt.Sprintf("text2-%d", i))))
batch.Update(doc)
err = idx.Batch(batch)
if err != nil {
t.Error(err)
}
batch.Reset()
expectedCount += 2
}
// verify doc count
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
docCount, err := indexReader.DocCount()
if err != nil {
t.Fatal(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
var si *Scorch
var ok bool
if si, ok = idx.(*Scorch); !ok {
t.Fatal("expects a scorch index")
}
// no merge operations are expected as per the original merge policy.
nfsr := atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot)
if nfsr != 20 {
t.Errorf("expected 20 root file segments, got: %d", nfsr)
}
ctx := context.Background()
ctx, cancel := context.WithCancel(ctx)
// cancel the force merge operation once the root has some new merge
// introductions. ie if the root has lesser file segments than earlier.
go func() {
for {
nval := atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot)
if nval < nfsr {
cancel()
return
}
time.Sleep(time.Millisecond * 5)
}
}()
err = si.ForceMerge(ctx, &mergeplan.MergePlanOptions{
MaxSegmentsPerTier: 1,
MaxSegmentSize: 10000,
SegmentsPerMergeTask: 5,
FloorSegmentSize: 10000,
})
if err != nil {
t.Errorf("ForceMerge failed, err: %v", err)
}
// verify the final root file segment count or forceMerge completion
if atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot) == 1 {
t.Errorf("expected many files at root, but got: %d segments",
atomic.LoadUint64(&si.stats.TotFileSegmentsAtRoot))
}
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexSeekBackwardsStats(t *testing.T) {
cfg := CreateConfig("TestIndexOpenReopen")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
// insert a doc
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("cat")))
err = idx.Update(doc)
if err != nil {
t.Fatalf("error updating index: %v", err)
}
// insert another doc
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("cat")))
err = idx.Update(doc)
if err != nil {
t.Fatalf("error updating index: %v", err)
}
reader, err := idx.Reader()
if err != nil {
t.Fatalf("error getting index reader: %v", err)
}
defer reader.Close()
tfr, err := reader.TermFieldReader(context.TODO(), []byte("cat"), "name", false, false, false)
if err != nil {
t.Fatalf("error getting term field readyer for name/cat: %v", err)
}
tfdFirst, err := tfr.Next(nil)
if err != nil {
t.Fatalf("error getting first tfd: %v", err)
}
_, err = tfr.Next(nil)
if err != nil {
t.Fatalf("error getting second tfd: %v", err)
}
// seek backwards to the first
_, err = tfr.Advance(tfdFirst.ID, nil)
if err != nil {
t.Fatalf("error adancing backwards: %v", err)
}
err = tfr.Close()
if err != nil {
t.Fatalf("error closing term field reader: %v", err)
}
if idx.(*Scorch).stats.TotTermSearchersStarted != idx.(*Scorch).stats.TotTermSearchersFinished {
t.Errorf("expected term searchers started %d to equal term searchers finished %d",
idx.(*Scorch).stats.TotTermSearchersStarted,
idx.(*Scorch).stats.TotTermSearchersFinished)
}
}
// fieldTerms contains the terms used by a document, keyed by field
type fieldTerms map[string][]string
// FieldsNotYetCached returns a list of fields not yet cached out of a larger list of fields
func (f fieldTerms) FieldsNotYetCached(fields []string) []string {
rv := make([]string, 0, len(fields))
for _, field := range fields {
if _, ok := f[field]; !ok {
rv = append(rv, field)
}
}
return rv
}
// Merge will combine two fieldTerms
// it assumes that the terms lists are complete (thus do not need to be merged)
// field terms from the other list always replace the ones in the receiver
func (f fieldTerms) Merge(other fieldTerms) {
for field, terms := range other {
f[field] = terms
}
}
func TestOpenBoltTimeout(t *testing.T) {
cfg := CreateConfig("TestIndexOpenReopen")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewScorch("storeName", cfg, analysisQueue)
if err != nil {
log.Fatalln(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
// new config
cfg2 := CreateConfig("TestIndexOpenReopen")
// copy path from original config
cfg2["path"] = cfg["path"]
// set timeout in this cfg
cfg2["bolt_timeout"] = "100ms"
idx2, err := NewScorch("storeName", cfg2, analysisQueue)
if err != nil {
log.Fatalln(err)
}
err = idx2.Open()
if err == nil {
t.Error("expected timeout error opening index again")
}
}
func TestReadOnlyIndex(t *testing.T) {
// https://github.com/blevesearch/bleve/issues/1623
cfg := CreateConfig("TestReadOnlyIndex")
err := InitTest(cfg)
if err != nil {
t.Fatal(err)
}
defer func() {
err := DestroyTest(cfg)
if err != nil {
t.Log(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
writeIdx, err := NewScorch(Name, cfg, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = writeIdx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
writeIdxClosed := false
defer func() {
if !writeIdxClosed {
err := writeIdx.Close()
if err != nil {
t.Fatal(err)
}
}
}()
// Add a single document to the index.
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = writeIdx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
writeIdx.Close()
writeIdxClosed = true
// After the index is written, change permissions on every file
// in the index to read-only.
var permissionsFunc func(folder string)
permissionsFunc = func(folder string) {
entries, _ := os.ReadDir(folder)
for _, entry := range entries {
fullName := filepath.Join(folder, entry.Name())
if entry.IsDir() {
permissionsFunc(fullName)
} else {
if err := os.Chmod(fullName, 0o555); err != nil {
t.Fatal(err)
}
}
}
}
permissionsFunc(cfg["path"].(string))
// Now reopen the index in read-only mode and attempt to read from it.
cfg["read_only"] = true
readIdx, err := NewScorch(Name, cfg, analysisQueue)
defer func() {
err := readIdx.Close()
if err != nil {
t.Fatal(err)
}
}()
if err != nil {
t.Fatal(err)
}
err = readIdx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
reader, err := readIdx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Fatal(err)
}
if docCount != 1 {
t.Errorf("Expected document count to be %d got %d", 1, docCount)
}
}
func BenchmarkAggregateFieldStats(b *testing.B) {
fieldStatsArray := make([]*fieldStats, 1000)
for i := range fieldStatsArray {
fieldStatsArray[i] = newFieldStats()
fieldStatsArray[i].Store("num_vectors", "vector", uint64(rand.Intn(1000)))
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
aggFieldStats := newFieldStats()
for _, fs := range fieldStatsArray {
aggFieldStats.Aggregate(fs)
}
}
}
func TestPersistorMergerOptions(t *testing.T) {
type test struct {
config string
expectErr bool
}
tests := []test{
{
// valid config and no error expected
config: `{
"scorchPersisterOptions": {
"persisterNapTimeMSec": 1110,
"memoryPressurePauseThreshold" : 333
}
}`,
expectErr: false,
},
{
// valid json with invalid config values
// and error expected
config: `{
"scorchPersisterOptions": {
"persisterNapTimeMSec": "1110",
"memoryPressurePauseThreshold" : [333]
}
}`,
expectErr: true,
},
{
// valid json with invalid config values
// and error expected
config: `{
"scorchPersisterOptions": {
"persisterNapTimeMSec": 1110.2,
"memoryPressurePauseThreshold" : 333
}
}`,
expectErr: true,
},
{
// invalid setting for scorchMergePlanOptions
config: `{
"scorchPersisterOptions": {
"persisterNapTimeMSec": 1110,
"memoryPressurePauseThreshold" : 333
},
"scorchMergePlanOptions": [{
"maxSegmentSize": 10000,
"maxSegmentsPerTier": 10,
"segmentsPerMergeTask": 10
}]
}`,
expectErr: true,
},
{
// valid setting
config: `{
"scorchPersisterOptions": {
"persisterNapTimeMSec": 1110,
"memoryPressurePauseThreshold" : 333
},
"scorchMergePlanOptions": {
"maxSegmentSize": 10000,
"maxSegmentsPerTier": 10,
"segmentsPerMergeTask": 10
}
}`,
expectErr: false,
},
{
config: `{
"scorchPersisterOptions": {
"persisterNapTimeMSec": 1110,
"memoryPressurePauseThreshold" : 333
},
"scorchMergePlanOptions": {
"maxSegmentSize": 5.6,
"maxSegmentsPerTier": 10,
"segmentsPerMergeTask": 10
}
}`,
expectErr: true,
},
}
for i, test := range tests {
cfg := map[string]interface{}{}
err := json.Unmarshal([]byte(test.config), &cfg)
if err != nil {
t.Fatalf("test %d: error unmarshalling config: %v", i, err)
}
analysisQueue := index.NewAnalysisQueue(1)
_, err = NewScorch(Name, cfg, analysisQueue)
if test.expectErr {
if err == nil {
t.Errorf("test %d: expected error, got nil", i)
}
} else {
if err != nil {
t.Errorf("test %d: unexpected error: %v", i, err)
}
}
}
}
================================================
FILE: index/scorch/segment_plugin.go
================================================
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"fmt"
"github.com/RoaringBitmap/roaring/v2"
"github.com/blevesearch/bleve/v2/geo"
index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"
zapv11 "github.com/blevesearch/zapx/v11"
zapv12 "github.com/blevesearch/zapx/v12"
zapv13 "github.com/blevesearch/zapx/v13"
zapv14 "github.com/blevesearch/zapx/v14"
zapv15 "github.com/blevesearch/zapx/v15"
zapv16 "github.com/blevesearch/zapx/v16"
zapv17 "github.com/blevesearch/zapx/v17"
)
// SegmentPlugin represents the essential functions required by a package to plug in
// it's segment implementation
type SegmentPlugin interface {
// Type is the name for this segment plugin
Type() string
// Version is a numeric value identifying a specific version of this type.
// When incompatible changes are made to a particular type of plugin, the
// version must be incremented.
Version() uint32
// New takes a set of Documents and turns them into a new Segment
New(results []index.Document) (segment.Segment, uint64, error)
NewUsing(results []index.Document, config map[string]interface{}) (segment.Segment, uint64, error)
// Open attempts to open the file at the specified path and
// return the corresponding Segment
Open(path string) (segment.Segment, error)
OpenUsing(path string, config map[string]interface{}) (segment.Segment, error)
// Merge takes a set of Segments, and creates a new segment on disk at
// the specified path.
// Drops is a set of bitmaps (one for each segment) indicating which
// documents can be dropped from the segments during the merge.
// If the closeCh channel is closed, Merge will cease doing work at
// the next opportunity, and return an error (closed).
// StatsReporter can optionally be provided, in which case progress
// made during the merge is reported while operation continues.
// Returns:
// A slice of new document numbers (one for each input segment),
// this allows the caller to know a particular document's new
// document number in the newly merged segment.
// The number of bytes written to the new segment file.
// An error, if any occurred.
Merge(segments []segment.Segment, drops []*roaring.Bitmap, path string,
closeCh chan struct{}, s segment.StatsReporter) (
[][]uint64, uint64, error)
MergeUsing(segments []segment.Segment, drops []*roaring.Bitmap, path string,
closeCh chan struct{}, s segment.StatsReporter, config map[string]interface{}) (
[][]uint64, uint64, error)
}
var supportedSegmentPlugins map[string]map[uint32]SegmentPlugin
var defaultSegmentPlugin SegmentPlugin
func init() {
ResetSegmentPlugins()
RegisterSegmentPlugin(&zapv17.ZapPlugin{}, true)
RegisterSegmentPlugin(&zapv16.ZapPlugin{}, false)
RegisterSegmentPlugin(&zapv15.ZapPlugin{}, false)
RegisterSegmentPlugin(&zapv14.ZapPlugin{}, false)
RegisterSegmentPlugin(&zapv13.ZapPlugin{}, false)
RegisterSegmentPlugin(&zapv12.ZapPlugin{}, false)
RegisterSegmentPlugin(&zapv11.ZapPlugin{}, false)
}
func ResetSegmentPlugins() {
supportedSegmentPlugins = map[string]map[uint32]SegmentPlugin{}
}
func RegisterSegmentPlugin(plugin SegmentPlugin, makeDefault bool) {
if _, ok := supportedSegmentPlugins[plugin.Type()]; !ok {
supportedSegmentPlugins[plugin.Type()] = map[uint32]SegmentPlugin{}
}
supportedSegmentPlugins[plugin.Type()][plugin.Version()] = plugin
if makeDefault {
defaultSegmentPlugin = plugin
}
}
func SupportedSegmentTypes() (rv []string) {
for k := range supportedSegmentPlugins {
rv = append(rv, k)
}
return
}
func SupportedSegmentTypeVersions(typ string) (rv []uint32) {
for k := range supportedSegmentPlugins[typ] {
rv = append(rv, k)
}
return rv
}
func chooseSegmentPlugin(forcedSegmentType string,
forcedSegmentVersion uint32) (SegmentPlugin, error) {
if versions, ok := supportedSegmentPlugins[forcedSegmentType]; ok {
if segPlugin, ok := versions[uint32(forcedSegmentVersion)]; ok {
return segPlugin, nil
}
return nil, fmt.Errorf(
"unsupported version %d for segment type: %s, supported: %v",
forcedSegmentVersion, forcedSegmentType,
SupportedSegmentTypeVersions(forcedSegmentType))
}
return nil, fmt.Errorf("unsupported segment type: %s, supported: %v",
forcedSegmentType, SupportedSegmentTypes())
}
func (s *Scorch) loadSegmentPlugin(forcedSegmentType string,
forcedSegmentVersion uint32) error {
segPlugin, err := chooseSegmentPlugin(forcedSegmentType,
forcedSegmentVersion)
if err != nil {
return err
}
s.segPlugin = segPlugin
return nil
}
func (s *Scorch) loadSpatialAnalyzerPlugin(typ string) error {
s.spatialPlugin = geo.GetSpatialAnalyzerPlugin(typ)
if s.spatialPlugin == nil {
return fmt.Errorf("unsupported spatial plugin type: %s", typ)
}
return nil
}
================================================
FILE: index/scorch/snapshot_index.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"container/heap"
"context"
"fmt"
"os"
"path/filepath"
"reflect"
"sort"
"sync"
"sync/atomic"
"github.com/RoaringBitmap/roaring/v2"
"github.com/blevesearch/bleve/v2/document"
index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"
"github.com/blevesearch/vellum"
lev "github.com/blevesearch/vellum/levenshtein"
bolt "go.etcd.io/bbolt"
)
// re usable, threadsafe levenshtein builders
var lb1, lb2 *lev.LevenshteinAutomatonBuilder
type asynchSegmentResult struct {
dict segment.TermDictionary
dictItr segment.DictionaryIterator
index int
docs *roaring.Bitmap
thesItr segment.ThesaurusIterator
err error
}
var reflectStaticSizeIndexSnapshot int
func init() {
var is interface{} = IndexSnapshot{}
reflectStaticSizeIndexSnapshot = int(reflect.TypeOf(is).Size())
var err error
lb1, err = lev.NewLevenshteinAutomatonBuilder(1, true)
if err != nil {
panic(fmt.Errorf("levenshtein automaton ed1 builder err: %v", err))
}
lb2, err = lev.NewLevenshteinAutomatonBuilder(2, true)
if err != nil {
panic(fmt.Errorf("levenshtein automaton ed2 builder err: %v", err))
}
}
type IndexSnapshot struct {
parent *Scorch
segment []*SegmentSnapshot
offsets []uint64
internal map[string][]byte
epoch uint64
size uint64
creator string
m sync.Mutex // Protects the fields that follow.
refs int64
m2 sync.Mutex // Protects the fields that follow.
fieldTFRs map[string][]*IndexSnapshotTermFieldReader // keyed by field, recycled TFR's
m3 sync.RWMutex // bm25 metrics specific - not to interfere with TFR creation
fieldCardinality map[string]int
// Stores information about zapx fields that have been
// fully deleted (indicated by UpdateFieldInfo.Deleted) or
// partially deleted index, store or docvalues (indicated by
// UpdateFieldInfo.Index or .Store or .DocValues).
// Used to short circuit queries trying to read stale data
updatedFields map[string]*index.UpdateFieldInfo
}
func (i *IndexSnapshot) Segments() []*SegmentSnapshot {
return i.segment
}
func (i *IndexSnapshot) Internal() map[string][]byte {
return i.internal
}
func (i *IndexSnapshot) AddRef() {
i.m.Lock()
i.refs++
i.m.Unlock()
}
func (i *IndexSnapshot) DecRef() (err error) {
i.m.Lock()
i.refs--
if i.refs == 0 {
for _, s := range i.segment {
if s != nil {
err2 := s.segment.DecRef()
if err == nil {
err = err2
}
}
}
if i.parent != nil {
go i.parent.AddEligibleForRemoval(i.epoch)
}
}
i.m.Unlock()
return err
}
func (i *IndexSnapshot) Close() error {
return i.DecRef()
}
func (i *IndexSnapshot) Size() int {
return int(i.size)
}
func (i *IndexSnapshot) updateSize() {
i.size += uint64(reflectStaticSizeIndexSnapshot)
for _, s := range i.segment {
i.size += uint64(s.Size())
}
}
func (is *IndexSnapshot) newIndexSnapshotFieldDict(field string,
makeItr func(i segment.TermDictionary) segment.DictionaryIterator,
randomLookup bool,
) (*IndexSnapshotFieldDict, error) {
results := make(chan *asynchSegmentResult, len(is.segment))
var totalBytesRead uint64
var fieldCardinality int64
for _, s := range is.segment {
go func(s *SegmentSnapshot) {
dict, err := s.segment.Dictionary(field)
if err != nil {
results <- &asynchSegmentResult{err: err}
} else {
if dictStats, ok := dict.(segment.DiskStatsReporter); ok {
atomic.AddUint64(&totalBytesRead, dictStats.BytesRead())
}
atomic.AddInt64(&fieldCardinality, int64(dict.Cardinality()))
if randomLookup {
results <- &asynchSegmentResult{dict: dict}
} else {
results <- &asynchSegmentResult{dictItr: makeItr(dict)}
}
}
}(s)
}
var err error
rv := &IndexSnapshotFieldDict{
snapshot: is,
cursors: make([]*segmentDictCursor, 0, len(is.segment)),
}
for count := 0; count < len(is.segment); count++ {
asr := <-results
if asr.err != nil && err == nil {
err = asr.err
} else {
if !randomLookup {
next, err2 := asr.dictItr.Next()
if err2 != nil && err == nil {
err = err2
}
if next != nil {
rv.cursors = append(rv.cursors, &segmentDictCursor{
itr: asr.dictItr,
curr: *next,
})
}
} else {
rv.cursors = append(rv.cursors, &segmentDictCursor{
dict: asr.dict,
})
}
}
}
rv.cardinality = int(fieldCardinality)
rv.bytesRead = totalBytesRead
// after ensuring we've read all items on channel
if err != nil {
return nil, err
}
if !randomLookup {
// prepare heap
heap.Init(rv)
}
return rv, nil
}
func (is *IndexSnapshot) FieldCardinality(field string) (rv int, err error) {
is.m3.RLock()
rv, ok := is.fieldCardinality[field]
is.m3.RUnlock()
if ok {
return rv, nil
}
is.m3.Lock()
defer is.m3.Unlock()
if is.fieldCardinality == nil {
is.fieldCardinality = make(map[string]int)
}
// check again to avoid redundant fieldDict creation
if rv, ok := is.fieldCardinality[field]; ok {
return rv, nil
}
fd, err := is.FieldDict(field)
if err != nil {
return rv, err
}
rv = fd.Cardinality()
is.fieldCardinality[field] = rv
return rv, nil
}
func (is *IndexSnapshot) FieldDict(field string) (index.FieldDict, error) {
return is.newIndexSnapshotFieldDict(field, func(is segment.TermDictionary) segment.DictionaryIterator {
return is.AutomatonIterator(nil, nil, nil)
}, false)
}
// calculateExclusiveEndFromInclusiveEnd produces the next key
// when sorting using memcmp style comparisons, suitable to
// use as the end key in a traditional (inclusive, exclusive]
// start/end range
func calculateExclusiveEndFromInclusiveEnd(inclusiveEnd []byte) []byte {
rv := inclusiveEnd
if len(inclusiveEnd) > 0 {
rv = make([]byte, len(inclusiveEnd))
copy(rv, inclusiveEnd)
if rv[len(rv)-1] < 0xff {
// last byte can be incremented by one
rv[len(rv)-1]++
} else {
// last byte is already 0xff, so append 0
// next key is simply one byte longer
rv = append(rv, 0x0)
}
}
return rv
}
func (is *IndexSnapshot) FieldDictRange(field string, startTerm []byte,
endTerm []byte,
) (index.FieldDict, error) {
return is.newIndexSnapshotFieldDict(field, func(is segment.TermDictionary) segment.DictionaryIterator {
endTermExclusive := calculateExclusiveEndFromInclusiveEnd(endTerm)
return is.AutomatonIterator(nil, startTerm, endTermExclusive)
}, false)
}
// calculateExclusiveEndFromPrefix produces the first key that
// does not have the same prefix as the input bytes, suitable
// to use as the end key in a traditional (inclusive, exclusive]
// start/end range
func calculateExclusiveEndFromPrefix(in []byte) []byte {
if len(in) == 0 {
return nil
}
rv := make([]byte, len(in))
copy(rv, in)
for i := len(rv) - 1; i >= 0; i-- {
rv[i]++
if rv[i] != 0 {
return rv // didn't overflow, so stop
}
}
// all bytes were 0xff, so return nil
// as there is no end key for this prefix
return nil
}
func (is *IndexSnapshot) FieldDictPrefix(field string,
termPrefix []byte,
) (index.FieldDict, error) {
termPrefixEnd := calculateExclusiveEndFromPrefix(termPrefix)
return is.newIndexSnapshotFieldDict(field, func(is segment.TermDictionary) segment.DictionaryIterator {
return is.AutomatonIterator(nil, termPrefix, termPrefixEnd)
}, false)
}
func (is *IndexSnapshot) FieldDictRegexp(field string,
termRegex string,
) (index.FieldDict, error) {
fd, _, err := is.FieldDictRegexpAutomaton(field, termRegex)
return fd, err
}
func (is *IndexSnapshot) FieldDictRegexpAutomaton(field string,
termRegex string,
) (index.FieldDict, index.RegexAutomaton, error) {
return is.fieldDictRegexp(field, termRegex)
}
func (is *IndexSnapshot) fieldDictRegexp(field string,
termRegex string,
) (index.FieldDict, index.RegexAutomaton, error) {
// TODO: potential optimization where the literal prefix represents the,
// entire regexp, allowing us to use PrefixIterator(prefixTerm)?
a, prefixBeg, prefixEnd, err := parseRegexp(termRegex)
if err != nil {
return nil, nil, err
}
fd, err := is.newIndexSnapshotFieldDict(field, func(is segment.TermDictionary) segment.DictionaryIterator {
return is.AutomatonIterator(a, prefixBeg, prefixEnd)
}, false)
if err != nil {
return nil, nil, err
}
return fd, a, nil
}
func (is *IndexSnapshot) getLevAutomaton(term string,
fuzziness uint8,
) (vellum.Automaton, error) {
switch fuzziness {
case 1:
return lb1.BuildDfa(term, fuzziness)
case 2:
return lb2.BuildDfa(term, fuzziness)
}
return nil, fmt.Errorf("fuzziness exceeds the max limit")
}
func (is *IndexSnapshot) FieldDictFuzzy(field string,
term string, fuzziness int, prefix string,
) (index.FieldDict, error) {
fd, _, err := is.FieldDictFuzzyAutomaton(field, term, fuzziness, prefix)
return fd, err
}
func (is *IndexSnapshot) FieldDictFuzzyAutomaton(field string,
term string, fuzziness int, prefix string,
) (index.FieldDict, index.FuzzyAutomaton, error) {
return is.fieldDictFuzzy(field, term, fuzziness, prefix)
}
func (is *IndexSnapshot) fieldDictFuzzy(field string,
term string, fuzziness int, prefix string,
) (index.FieldDict, index.FuzzyAutomaton, error) {
a, err := is.getLevAutomaton(term, uint8(fuzziness))
if err != nil {
return nil, nil, err
}
var fa index.FuzzyAutomaton
if vfa, ok := a.(vellum.FuzzyAutomaton); ok {
fa = vfa
}
var prefixBeg, prefixEnd []byte
if prefix != "" {
prefixBeg = []byte(prefix)
prefixEnd = calculateExclusiveEndFromPrefix(prefixBeg)
}
fd, err := is.newIndexSnapshotFieldDict(field, func(is segment.TermDictionary) segment.DictionaryIterator {
return is.AutomatonIterator(a, prefixBeg, prefixEnd)
}, false)
if err != nil {
return nil, nil, err
}
return fd, fa, nil
}
func (is *IndexSnapshot) FieldDictContains(field string) (index.FieldDictContains, error) {
return is.newIndexSnapshotFieldDict(field, nil, true)
}
func (is *IndexSnapshot) DocIDReaderAll() (index.DocIDReader, error) {
results := make(chan *asynchSegmentResult, len(is.segment))
for index, segment := range is.segment {
go func(index int, segment *SegmentSnapshot) {
results <- &asynchSegmentResult{
index: index,
docs: segment.DocNumbersLive(),
}
}(index, segment)
}
return is.newDocIDReader(results)
}
func (is *IndexSnapshot) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
results := make(chan *asynchSegmentResult, len(is.segment))
for index, segment := range is.segment {
go func(index int, segment *SegmentSnapshot) {
docs, err := segment.DocNumbers(ids)
if err != nil {
results <- &asynchSegmentResult{err: err}
} else {
results <- &asynchSegmentResult{
index: index,
docs: docs,
}
}
}(index, segment)
}
return is.newDocIDReader(results)
}
func (is *IndexSnapshot) newDocIDReader(results chan *asynchSegmentResult) (index.DocIDReader, error) {
rv := &IndexSnapshotDocIDReader{
snapshot: is,
iterators: make([]roaring.IntIterable, len(is.segment)),
}
var err error
for count := 0; count < len(is.segment); count++ {
asr := <-results
if asr.err != nil {
if err == nil {
// returns the first error encountered
err = asr.err
}
} else if err == nil {
rv.iterators[asr.index] = asr.docs.Iterator()
}
}
if err != nil {
return nil, err
}
return rv, nil
}
func (is *IndexSnapshot) Fields() ([]string, error) {
// FIXME not making this concurrent for now as it's not used in hot path
// of any searches at the moment (just a debug aid)
fieldsMap := make(map[string]struct{})
for _, segment := range is.segment {
fields := segment.Fields()
for _, field := range fields {
fieldsMap[field] = struct{}{}
}
}
rv := make([]string, 0, len(fieldsMap))
for k := range fieldsMap {
rv = append(rv, k)
}
return rv, nil
}
func (is *IndexSnapshot) GetInternal(key []byte) ([]byte, error) {
return is.internal[string(key)], nil
}
func (is *IndexSnapshot) DocCount() (uint64, error) {
var rv uint64
for _, segment := range is.segment {
rv += segment.CountRoot()
}
return rv, nil
}
func (is *IndexSnapshot) Document(id string) (rv index.Document, err error) {
// FIXME could be done more efficiently directly, but reusing for simplicity
tfr, err := is.TermFieldReader(context.TODO(), []byte(id), "_id", false, false, false)
if err != nil {
return nil, err
}
defer func() {
if cerr := tfr.Close(); err == nil && cerr != nil {
err = cerr
}
}()
next, err := tfr.Next(nil)
if err != nil {
return nil, err
}
if next == nil {
// no such doc exists
return nil, nil
}
docNum, err := next.ID.Value()
if err != nil {
return nil, err
}
segmentIndex, localDocNum := is.segmentIndexAndLocalDocNumFromGlobal(docNum)
rvd := document.NewDocument(id)
err = is.segment[segmentIndex].VisitDocument(localDocNum, func(name string, typ byte, val []byte, pos []uint64) bool {
if name == "_id" {
return true
}
// track uncompressed stored fields bytes as part of IO stats.
// However, ideally we'd need to track the compressed on-disk value
// Keeping that TODO for now until we have a cleaner way.
rvd.StoredFieldsSize += uint64(len(val))
// Skip fields that have been completely deleted or had their
// store data deleted
if info, ok := is.updatedFields[name]; ok &&
(info.Deleted || info.Store) {
return true
}
// copy value, array positions to preserve them beyond the scope of this callback
value := append([]byte(nil), val...)
arrayPos := append([]uint64(nil), pos...)
switch typ {
case 't':
rvd.AddField(document.NewTextField(name, arrayPos, value))
case 'n':
rvd.AddField(document.NewNumericFieldFromBytes(name, arrayPos, value))
case 'i':
rvd.AddField(document.NewIPFieldFromBytes(name, arrayPos, value))
case 'd':
rvd.AddField(document.NewDateTimeFieldFromBytes(name, arrayPos, value))
case 'b':
rvd.AddField(document.NewBooleanFieldFromBytes(name, arrayPos, value))
case 'g':
rvd.AddField(document.NewGeoPointFieldFromBytes(name, arrayPos, value))
case 's':
rvd.AddField(document.NewGeoShapeFieldFromBytes(name, arrayPos, value))
}
return true
})
if err != nil {
return nil, err
}
return rvd, nil
}
// In a multi-segment index, each document has:
// 1. a local docnum - local to the segment
// 2. a global docnum - unique identifier across the index
// This function returns the segment index(the segment in which the docnum is present)
// and local docnum of a document.
func (is *IndexSnapshot) segmentIndexAndLocalDocNumFromGlobal(docNum uint64) (int, uint64) {
segmentIndex := sort.Search(len(is.offsets),
func(x int) bool {
return is.offsets[x] > docNum
}) - 1
return int(segmentIndex), docNum - is.offsets[segmentIndex]
}
func (is *IndexSnapshot) ExternalID(id index.IndexInternalID) (string, error) {
docNum, err := id.Value()
if err != nil {
return "", err
}
segmentIndex, localDocNum := is.segmentIndexAndLocalDocNumFromGlobal(docNum)
v, err := is.segment[segmentIndex].DocID(localDocNum)
if err != nil {
return "", err
}
if v == nil {
return "", fmt.Errorf("document number %d not found", docNum)
}
return string(v), nil
}
func (is *IndexSnapshot) segmentIndexAndLocalDocNum(id index.IndexInternalID) (int, uint64, error) {
docNum, err := id.Value()
if err != nil {
return 0, 0, err
}
segIdx, localDocNum := is.segmentIndexAndLocalDocNumFromGlobal(docNum)
return segIdx, localDocNum, nil
}
func (is *IndexSnapshot) InternalID(id string) (rv index.IndexInternalID, err error) {
// FIXME could be done more efficiently directly, but reusing for simplicity
tfr, err := is.TermFieldReader(context.TODO(), []byte(id), "_id", false, false, false)
if err != nil {
return nil, err
}
defer func() {
if cerr := tfr.Close(); err == nil && cerr != nil {
err = cerr
}
}()
next, err := tfr.Next(nil)
if err != nil || next == nil {
return nil, err
}
return next.ID, nil
}
func (is *IndexSnapshot) TermFieldReader(ctx context.Context, term []byte, field string, includeFreq,
includeNorm, includeTermVectors bool,
) (index.TermFieldReader, error) {
rv := is.allocTermFieldReaderDicts(field)
rv.ctx = ctx
rv.term = term
rv.field = field
rv.snapshot = is
if rv.postings == nil {
rv.postings = make([]segment.PostingsList, len(is.segment))
}
if rv.iterators == nil {
rv.iterators = make([]segment.PostingsIterator, len(is.segment))
}
rv.segmentOffset = 0
rv.includeFreq = includeFreq
rv.includeNorm = includeNorm
rv.includeTermVectors = includeTermVectors
rv.currPosting = nil
rv.currID = rv.currID[:0]
if rv.dicts == nil {
rv.dicts = make([]segment.TermDictionary, len(is.segment))
for i, s := range is.segment {
// the intention behind this compare and swap operation is
// to make sure that the accounting of the metadata is happening
// only once(which corresponds to this persisted segment's most
// recent segPlugin.Open() call), and any subsequent queries won't
// incur this cost which would essentially be a double counting.
if atomic.CompareAndSwapUint32(&s.mmaped, 1, 0) {
segBytesRead := s.segment.BytesRead()
rv.incrementBytesRead(segBytesRead)
}
var dict segment.TermDictionary
var err error
// Skip fields that have been completely deleted or had their
// index data deleted
if info, ok := is.updatedFields[field]; ok &&
(info.Index || info.Deleted) {
dict, err = s.segment.Dictionary("")
} else {
dict, err = s.segment.Dictionary(field)
}
if err != nil {
return nil, err
}
if dictStats, ok := dict.(segment.DiskStatsReporter); ok {
bytesRead := dictStats.BytesRead()
rv.incrementBytesRead(bytesRead)
}
rv.dicts[i] = dict
}
}
for i, s := range is.segment {
var prevBytesReadPL uint64
if rv.postings[i] != nil {
prevBytesReadPL = rv.postings[i].BytesRead()
}
pl, err := rv.dicts[i].PostingsList(term, s.deleted, rv.postings[i])
if err != nil {
return nil, err
}
rv.postings[i] = pl
var prevBytesReadItr uint64
if rv.iterators[i] != nil {
prevBytesReadItr = rv.iterators[i].BytesRead()
}
rv.iterators[i] = pl.Iterator(includeFreq, includeNorm, includeTermVectors, rv.iterators[i])
if bytesRead := rv.postings[i].BytesRead(); prevBytesReadPL < bytesRead {
rv.incrementBytesRead(bytesRead - prevBytesReadPL)
}
if bytesRead := rv.iterators[i].BytesRead(); prevBytesReadItr < bytesRead {
rv.incrementBytesRead(bytesRead - prevBytesReadItr)
}
}
// ONLY update the bytes read value beyond this point for this TFR if scoring is enabled
rv.updateBytesRead = rv.includeFreq || rv.includeNorm || rv.includeTermVectors
atomic.AddUint64(&is.parent.stats.TotTermSearchersStarted, uint64(1))
return rv, nil
}
func (is *IndexSnapshot) allocTermFieldReaderDicts(field string) (tfr *IndexSnapshotTermFieldReader) {
is.m2.Lock()
if is.fieldTFRs != nil {
tfrs := is.fieldTFRs[field]
last := len(tfrs) - 1
if last >= 0 {
tfr = tfrs[last]
tfrs[last] = nil
is.fieldTFRs[field] = tfrs[:last]
is.m2.Unlock()
return
}
}
is.m2.Unlock()
return &IndexSnapshotTermFieldReader{
recycle: true,
}
}
// DefaultFieldTFRCacheThreshold limits the number of TermFieldReaders(TFR) for
// a field in an index snapshot. Without this limit, when recycling TFRs, it is
// possible that a very large number of TFRs may be added to the recycle
// cache, which could eventually lead to significant memory consumption.
// This threshold can be overwritten by users at the library level by changing the
// exported variable, or at the index level by setting the "fieldTFRCacheThreshold"
// in the kvConfig.
var DefaultFieldTFRCacheThreshold int = 0 // disabled because it causes MB-64604
func (is *IndexSnapshot) getFieldTFRCacheThreshold() int {
if is.parent.config != nil {
if val, exists := is.parent.config["fieldTFRCacheThreshold"]; exists {
if x, ok := val.(float64); ok {
// JSON unmarshal-ed into a map[string]interface{} will default
// to float64 for numbers, so we need to check for float64 first.
return int(x)
} else if x, ok := val.(int); ok {
// If library users provided an int in the config, we'll honor it.
return x
}
}
}
return DefaultFieldTFRCacheThreshold
}
func (is *IndexSnapshot) recycleTermFieldReader(tfr *IndexSnapshotTermFieldReader) {
if !tfr.recycle {
// Do not recycle an optimized unadorned term field reader (used for
// ConjunctionUnadorned or DisjunctionUnadorned), during when a fresh
// roaring.Bitmap is built by AND-ing or OR-ing individual bitmaps,
// and we'll need to release them for GC. (See MB-40916)
return
}
is.parent.rootLock.RLock()
obsolete := is.parent.root != is
is.parent.rootLock.RUnlock()
if obsolete {
// if we're not the current root (mutations happened), don't bother recycling
return
}
is.m2.Lock()
if is.fieldTFRs == nil {
is.fieldTFRs = make(map[string][]*IndexSnapshotTermFieldReader)
}
if len(is.fieldTFRs[tfr.field]) < is.getFieldTFRCacheThreshold() {
tfr.bytesRead = 0
is.fieldTFRs[tfr.field] = append(is.fieldTFRs[tfr.field], tfr)
}
is.m2.Unlock()
}
func (is *IndexSnapshot) documentVisitFieldTermsOnSegment(
segmentIndex int, localDocNum uint64, fields []string, cFields []string,
visitor index.DocValueVisitor, dvs segment.DocVisitState) (
cFieldsOut []string, dvsOut segment.DocVisitState, err error,
) {
ss := is.segment[segmentIndex]
var vFields []string // fields that are visitable via the segment
ssv, ssvOk := ss.segment.(segment.DocValueVisitable)
if ssvOk && ssv != nil {
vFields, err = ssv.VisitableDocValueFields()
if err != nil {
return nil, nil, err
}
}
// Filter out fields that have been completely deleted or had their
// docvalues data deleted from both visitable fields and required fields
filterUpdatedFields := func(fields []string) []string {
filteredFields := make([]string, 0, len(fields))
for _, field := range fields {
if info, ok := is.updatedFields[field]; ok &&
(info.DocValues || info.Deleted) {
continue
}
filteredFields = append(filteredFields, field)
}
return filteredFields
}
if len(is.updatedFields) > 0 {
fields = filterUpdatedFields(fields)
vFields = filterUpdatedFields(vFields)
}
var errCh chan error
// cFields represents the fields that we'll need from the
// cachedDocs, and might be optionally be provided by the caller,
// if the caller happens to know we're on the same segmentIndex
// from a previous invocation
if cFields == nil {
cFields = subtractStrings(fields, vFields)
if len(cFields) > 0 && !ss.cachedDocs.hasFields(cFields) {
errCh = make(chan error, 1)
go func() {
err := ss.cachedDocs.prepareFields(cFields, ss)
if err != nil {
errCh <- err
}
close(errCh)
}()
}
}
if ssvOk && ssv != nil && len(vFields) > 0 {
dvs, err = ssv.VisitDocValues(localDocNum, fields, visitor, dvs)
if err != nil {
return nil, nil, err
}
}
if errCh != nil {
err = <-errCh
if err != nil {
return nil, nil, err
}
}
if len(cFields) > 0 {
ss.cachedDocs.visitDoc(localDocNum, cFields, visitor)
}
return cFields, dvs, nil
}
func (is *IndexSnapshot) DocValueReader(fields []string) (
index.DocValueReader, error,
) {
return &DocValueReader{i: is, fields: fields, currSegmentIndex: -1}, nil
}
type DocValueReader struct {
i *IndexSnapshot
fields []string
dvs segment.DocVisitState
currSegmentIndex int
currCachedFields []string
totalBytesRead uint64
bytesRead uint64
}
func (dvr *DocValueReader) BytesRead() uint64 {
return dvr.totalBytesRead + dvr.bytesRead
}
func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID,
visitor index.DocValueVisitor,
) (err error) {
docNum, err := id.Value()
if err != nil {
return err
}
segmentIndex, localDocNum := dvr.i.segmentIndexAndLocalDocNumFromGlobal(docNum)
if segmentIndex >= len(dvr.i.segment) {
return nil
}
if dvr.currSegmentIndex != segmentIndex {
dvr.currSegmentIndex = segmentIndex
dvr.currCachedFields = nil
dvr.totalBytesRead += dvr.bytesRead
dvr.bytesRead = 0
}
dvr.currCachedFields, dvr.dvs, err = dvr.i.documentVisitFieldTermsOnSegment(
dvr.currSegmentIndex, localDocNum, dvr.fields, dvr.currCachedFields, visitor, dvr.dvs)
if dvr.dvs != nil {
dvr.bytesRead = dvr.dvs.BytesRead()
}
return err
}
func (is *IndexSnapshot) DumpAll() chan interface{} {
rv := make(chan interface{})
go func() {
close(rv)
}()
return rv
}
func (is *IndexSnapshot) DumpDoc(id string) chan interface{} {
rv := make(chan interface{})
go func() {
close(rv)
}()
return rv
}
func (is *IndexSnapshot) DumpFields() chan interface{} {
rv := make(chan interface{})
go func() {
close(rv)
}()
return rv
}
func (is *IndexSnapshot) diskSegmentsPaths() map[string]struct{} {
rv := make(map[string]struct{}, len(is.segment))
for _, s := range is.segment {
if seg, ok := s.segment.(segment.PersistedSegment); ok {
rv[seg.Path()] = struct{}{}
}
}
return rv
}
// reClaimableDocsRatio gives a ratio about the obsoleted or
// reclaimable documents present in a given index snapshot.
func (is *IndexSnapshot) reClaimableDocsRatio() float64 {
var totalCount, liveCount uint64
for _, s := range is.segment {
if _, ok := s.segment.(segment.PersistedSegment); ok {
totalCount += uint64(s.FullSize())
liveCount += uint64(s.Count())
}
}
if totalCount > 0 {
return float64(totalCount-liveCount) / float64(totalCount)
}
return 0
}
// subtractStrings returns set a minus elements of set b.
func subtractStrings(a, b []string) []string {
if len(b) == 0 {
return a
}
rv := make([]string, 0, len(a))
OUTER:
for _, as := range a {
for _, bs := range b {
if as == bs {
continue OUTER
}
}
rv = append(rv, as)
}
return rv
}
func (is *IndexSnapshot) CopyTo(d index.Directory) error {
// get the root bolt file.
w, err := d.GetWriter(filepath.Join("store", "root.bolt"))
if err != nil || w == nil {
return fmt.Errorf("failed to create the root.bolt file, err: %v", err)
}
rootFile, ok := w.(*os.File)
if !ok {
return fmt.Errorf("invalid root.bolt file found")
}
copyBolt, err := bolt.Open(rootFile.Name(), 0o600, nil)
if err != nil {
return err
}
defer func() {
w.Close()
if cerr := copyBolt.Close(); cerr != nil && err == nil {
err = cerr
}
}()
// start a write transaction
tx, err := copyBolt.Begin(true)
if err != nil {
return err
}
_, _, err = prepareBoltSnapshot(is, tx, "", is.parent.segPlugin, nil, d)
if err != nil {
_ = tx.Rollback()
return fmt.Errorf("error backing up index snapshot: %v", err)
}
// commit bolt data
err = tx.Commit()
if err != nil {
return fmt.Errorf("error commit tx to backup root bolt: %v", err)
}
return copyBolt.Sync()
}
func (is *IndexSnapshot) UpdateIOStats(val uint64) {
atomic.AddUint64(&is.parent.stats.TotBytesReadAtQueryTime, val)
}
func (is *IndexSnapshot) GetSpatialAnalyzerPlugin(typ string) (
index.SpatialAnalyzerPlugin, error,
) {
var rv index.SpatialAnalyzerPlugin
is.m.Lock()
rv = is.parent.spatialPlugin
is.m.Unlock()
if rv == nil {
return nil, fmt.Errorf("no spatial plugin type: %s found", typ)
}
return rv, nil
}
func (is *IndexSnapshot) CloseCopyReader() error {
// first unmark the segments that were marked for backup by this index snapshot
is.parent.rootLock.Lock()
for _, seg := range is.segment {
var fileName string
if perSeg, ok := seg.segment.(segment.PersistedSegment); ok {
// segment is persisted
fileName = filepath.Base(perSeg.Path())
} else {
// segment is not persisted
// the name of the segment file that is generated if the
// the segment is persisted in the future.
fileName = zapFileName(seg.id)
}
if is.parent.copyScheduled[fileName]--; is.parent.copyScheduled[fileName] <= 0 {
delete(is.parent.copyScheduled, fileName)
}
}
is.parent.rootLock.Unlock()
// close the index snapshot normally
return is.Close()
}
func (is *IndexSnapshot) ThesaurusTermReader(ctx context.Context, thesaurusName string, term []byte) (index.ThesaurusTermReader, error) {
rv := &IndexSnapshotThesaurusTermReader{
name: thesaurusName,
snapshot: is,
postings: make([]segment.SynonymsList, len(is.segment)),
iterators: make([]segment.SynonymsIterator, len(is.segment)),
thesauri: make([]segment.Thesaurus, len(is.segment)),
segmentOffset: 0,
}
for i, s := range is.segment {
if synSeg, ok := s.segment.(segment.ThesaurusSegment); ok {
thes, err := synSeg.Thesaurus(thesaurusName)
if err != nil {
return nil, err
}
rv.thesauri[i] = thes
pl, err := rv.thesauri[i].SynonymsList(term, s.deleted, rv.postings[i])
if err != nil {
return nil, err
}
rv.postings[i] = pl
rv.iterators[i] = pl.Iterator(rv.iterators[i])
}
}
return rv, nil
}
func (is *IndexSnapshot) newIndexSnapshotThesaurusKeys(name string,
makeItr func(i segment.Thesaurus) segment.ThesaurusIterator,
) (*IndexSnapshotThesaurusKeys, error) {
results := make(chan *asynchSegmentResult, len(is.segment))
var wg sync.WaitGroup
wg.Add(len(is.segment))
for _, s := range is.segment {
go func(s *SegmentSnapshot) {
defer wg.Done()
if synSeg, ok := s.segment.(segment.ThesaurusSegment); ok {
thes, err := synSeg.Thesaurus(name)
if err != nil {
results <- &asynchSegmentResult{err: err}
} else {
results <- &asynchSegmentResult{thesItr: makeItr(thes)}
}
}
}(s)
}
// Close the channel after all goroutines complete
go func() {
wg.Wait()
close(results)
}()
var err error
rv := &IndexSnapshotThesaurusKeys{
snapshot: is,
cursors: make([]*segmentThesCursor, 0, len(is.segment)),
}
for asr := range results {
if asr.err != nil && err == nil {
err = asr.err
} else {
next, err2 := asr.thesItr.Next()
if err2 != nil && err == nil {
err = err2
}
if next != nil {
rv.cursors = append(rv.cursors, &segmentThesCursor{
itr: asr.thesItr,
curr: *next,
})
}
}
}
// after ensuring we've read all items on channel
if err != nil {
return nil, err
}
return rv, nil
}
func (is *IndexSnapshot) ThesaurusKeys(name string) (index.ThesaurusKeys, error) {
return is.newIndexSnapshotThesaurusKeys(name, func(is segment.Thesaurus) segment.ThesaurusIterator {
return is.AutomatonIterator(nil, nil, nil)
})
}
func (is *IndexSnapshot) ThesaurusKeysFuzzy(name string,
term string, fuzziness int, prefix string,
) (index.ThesaurusKeys, error) {
a, err := is.getLevAutomaton(term, uint8(fuzziness))
if err != nil {
return nil, err
}
var prefixBeg, prefixEnd []byte
if prefix != "" {
prefixBeg = []byte(prefix)
prefixEnd = calculateExclusiveEndFromPrefix(prefixBeg)
}
return is.newIndexSnapshotThesaurusKeys(name, func(is segment.Thesaurus) segment.ThesaurusIterator {
return is.AutomatonIterator(a, prefixBeg, prefixEnd)
})
}
func (is *IndexSnapshot) ThesaurusKeysPrefix(name string,
termPrefix []byte,
) (index.ThesaurusKeys, error) {
termPrefixEnd := calculateExclusiveEndFromPrefix(termPrefix)
return is.newIndexSnapshotThesaurusKeys(name, func(is segment.Thesaurus) segment.ThesaurusIterator {
return is.AutomatonIterator(nil, termPrefix, termPrefixEnd)
})
}
func (is *IndexSnapshot) ThesaurusKeysRegexp(name string,
termRegex string,
) (index.ThesaurusKeys, error) {
a, prefixBeg, prefixEnd, err := parseRegexp(termRegex)
if err != nil {
return nil, err
}
return is.newIndexSnapshotThesaurusKeys(name, func(is segment.Thesaurus) segment.ThesaurusIterator {
return is.AutomatonIterator(a, prefixBeg, prefixEnd)
})
}
func (is *IndexSnapshot) UpdateSynonymSearchCount(delta uint64) {
atomic.AddUint64(&is.parent.stats.TotSynonymSearches, delta)
}
// Update current snapshot updated field data as well as pass it on to all segments and segment bases
func (is *IndexSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) {
is.m.Lock()
defer is.m.Unlock()
is.MergeUpdateFieldsInfo(updatedFields)
for _, segmentSnapshot := range is.segment {
segmentSnapshot.UpdateFieldsInfo(is.updatedFields)
}
}
// Merge given updated field information with existing updated field information
func (is *IndexSnapshot) MergeUpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) {
if is.updatedFields == nil {
is.updatedFields = updatedFields
} else {
for fieldName, info := range updatedFields {
if val, ok := is.updatedFields[fieldName]; ok {
val.Deleted = val.Deleted || info.Deleted
val.Index = val.Index || info.Index
val.DocValues = val.DocValues || info.DocValues
val.Store = val.Store || info.Store
} else {
is.updatedFields[fieldName] = info
}
}
}
}
// TermFrequencies returns the top N terms ordered by the frequencies
// for a given field across all segments in the index snapshot.
func (is *IndexSnapshot) TermFrequencies(field string, limit int, descending bool) (
termFreqs []index.TermFreq, err error) {
if len(is.segment) == 0 {
return nil, nil
}
if limit <= 0 {
return nil, fmt.Errorf("limit must be positive")
}
// Use FieldDict which aggregates term frequencies across all segments
fieldDict, err := is.FieldDict(field)
if err != nil {
return nil, fmt.Errorf("failed to get field dictionary for field %s: %v", field, err)
}
defer fieldDict.Close()
// Preallocate slice with capacity equal to the number of unique terms
// in the field dictionary
termFreqs = make([]index.TermFreq, 0, fieldDict.Cardinality())
// Iterate through all terms using FieldDict
for {
dictEntry, err := fieldDict.Next()
if err != nil {
return nil, fmt.Errorf("error iterating field dictionary: %v", err)
}
if dictEntry == nil {
break // End of terms
}
termFreqs = append(termFreqs, index.TermFreq{
Term: dictEntry.Term,
Frequency: dictEntry.Count,
})
}
// Sort by frequency (descending or ascending)
sort.Slice(termFreqs, func(i, j int) bool {
if termFreqs[i].Frequency == termFreqs[j].Frequency {
// If frequencies are equal, sort by term lexicographically
return termFreqs[i].Term < termFreqs[j].Term
}
if descending {
return termFreqs[i].Frequency > termFreqs[j].Frequency
}
return termFreqs[i].Frequency < termFreqs[j].Frequency
})
if limit >= len(termFreqs) {
return termFreqs, nil
}
return termFreqs[:limit], nil
}
// Ancestors returns the ancestor IDs for the given document ID. The prealloc
// slice can be provided to avoid allocations downstream, and MUST be empty.
func (i *IndexSnapshot) Ancestors(ID index.IndexInternalID, prealloc []index.AncestorID) ([]index.AncestorID, error) {
// get segment and local doc num for the ID
seg, ldoc, err := i.segmentIndexAndLocalDocNum(ID)
if err != nil {
return nil, err
}
// get ancestors from the segment
prealloc = i.segment[seg].Ancestors(ldoc, prealloc)
// get global offset for the segment (correcting factor for multi-segment indexes)
globalOffset := i.offsets[seg]
// adjust ancestors to global doc numbers, not local to segment
for idx := range prealloc {
prealloc[idx] = prealloc[idx].Add(globalOffset)
}
// return adjusted ancestors
return prealloc, nil
}
================================================
FILE: index/scorch/snapshot_index_dict.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"container/heap"
index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"
)
type segmentDictCursor struct {
dict segment.TermDictionary
itr segment.DictionaryIterator
curr index.DictEntry
}
type IndexSnapshotFieldDict struct {
cardinality int
bytesRead uint64
snapshot *IndexSnapshot
cursors []*segmentDictCursor
entry index.DictEntry
}
func (i *IndexSnapshotFieldDict) BytesRead() uint64 {
return i.bytesRead
}
func (i *IndexSnapshotFieldDict) Len() int { return len(i.cursors) }
func (i *IndexSnapshotFieldDict) Less(a, b int) bool {
return i.cursors[a].curr.Term < i.cursors[b].curr.Term
}
func (i *IndexSnapshotFieldDict) Swap(a, b int) {
i.cursors[a], i.cursors[b] = i.cursors[b], i.cursors[a]
}
func (i *IndexSnapshotFieldDict) Push(x interface{}) {
i.cursors = append(i.cursors, x.(*segmentDictCursor))
}
func (i *IndexSnapshotFieldDict) Pop() interface{} {
n := len(i.cursors)
x := i.cursors[n-1]
i.cursors = i.cursors[0 : n-1]
return x
}
func (i *IndexSnapshotFieldDict) Next() (*index.DictEntry, error) {
if len(i.cursors) == 0 {
return nil, nil
}
i.entry = i.cursors[0].curr
next, err := i.cursors[0].itr.Next()
if err != nil {
return nil, err
}
if next == nil {
// at end of this cursor, remove it
heap.Pop(i)
} else {
// modified heap, fix it
i.cursors[0].curr = *next
heap.Fix(i, 0)
}
// look for any other entries with the exact same term
for len(i.cursors) > 0 && i.cursors[0].curr.Term == i.entry.Term {
i.entry.Count += i.cursors[0].curr.Count
next, err := i.cursors[0].itr.Next()
if err != nil {
return nil, err
}
if next == nil {
// at end of this cursor, remove it
heap.Pop(i)
} else {
// modified heap, fix it
i.cursors[0].curr = *next
heap.Fix(i, 0)
}
}
return &i.entry, nil
}
func (i *IndexSnapshotFieldDict) Cardinality() int {
return i.cardinality
}
func (i *IndexSnapshotFieldDict) Close() error {
return nil
}
func (i *IndexSnapshotFieldDict) Contains(key []byte) (bool, error) {
if len(i.cursors) == 0 {
return false, nil
}
for _, cursor := range i.cursors {
if found, _ := cursor.dict.Contains(key); found {
return true, nil
}
}
return false, nil
}
================================================
FILE: index/scorch/snapshot_index_doc.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"reflect"
"github.com/RoaringBitmap/roaring/v2"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeIndexSnapshotDocIDReader int
func init() {
var isdr IndexSnapshotDocIDReader
reflectStaticSizeIndexSnapshotDocIDReader = int(reflect.TypeOf(isdr).Size())
}
type IndexSnapshotDocIDReader struct {
snapshot *IndexSnapshot
iterators []roaring.IntIterable
segmentOffset int
}
func (i *IndexSnapshotDocIDReader) Size() int {
return reflectStaticSizeIndexSnapshotDocIDReader + size.SizeOfPtr
}
func (i *IndexSnapshotDocIDReader) Next() (index.IndexInternalID, error) {
for i.segmentOffset < len(i.iterators) {
if !i.iterators[i.segmentOffset].HasNext() {
i.segmentOffset++
continue
}
next := i.iterators[i.segmentOffset].Next()
// make segment number into global number by adding offset
globalOffset := i.snapshot.offsets[i.segmentOffset]
return index.NewIndexInternalID(nil, uint64(next)+globalOffset), nil
}
return nil, nil
}
func (i *IndexSnapshotDocIDReader) Advance(ID index.IndexInternalID) (index.IndexInternalID, error) {
// FIXME do something better
next, err := i.Next()
if err != nil {
return nil, err
}
if next == nil {
return nil, nil
}
for next.Compare(ID) < 0 {
next, err = i.Next()
if err != nil {
return nil, err
}
if next == nil {
break
}
}
return next, nil
}
func (i *IndexSnapshotDocIDReader) Close() error {
return nil
}
================================================
FILE: index/scorch/snapshot_index_str.go
================================================
// Copyright (c) 2024 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"reflect"
"github.com/blevesearch/bleve/v2/size"
segment "github.com/blevesearch/scorch_segment_api/v2"
)
var reflectStaticSizeIndexSnapshotThesaurusTermReader int
func init() {
var istr IndexSnapshotThesaurusTermReader
reflectStaticSizeIndexSnapshotThesaurusTermReader = int(reflect.TypeOf(istr).Size())
}
type IndexSnapshotThesaurusTermReader struct {
name string
snapshot *IndexSnapshot
thesauri []segment.Thesaurus
postings []segment.SynonymsList
iterators []segment.SynonymsIterator
segmentOffset int
}
func (i *IndexSnapshotThesaurusTermReader) Size() int {
sizeInBytes := reflectStaticSizeIndexSnapshotThesaurusTermReader + size.SizeOfPtr +
len(i.name) + size.SizeOfString
for _, postings := range i.postings {
if postings != nil {
sizeInBytes += postings.Size()
}
}
for _, iterator := range i.iterators {
if iterator != nil {
sizeInBytes += iterator.Size()
}
}
return sizeInBytes
}
func (i *IndexSnapshotThesaurusTermReader) Next() (string, error) {
// find the next hit
for i.segmentOffset < len(i.iterators) {
if i.iterators[i.segmentOffset] != nil {
next, err := i.iterators[i.segmentOffset].Next()
if err != nil {
return "", err
}
if next != nil {
synTerm := next.Term()
return synTerm, nil
}
}
i.segmentOffset++
}
return "", nil
}
func (i *IndexSnapshotThesaurusTermReader) Close() error {
return nil
}
================================================
FILE: index/scorch/snapshot_index_test.go
================================================
package scorch
import (
"testing"
"github.com/blevesearch/vellum"
)
func TestIndexSnapshot_getLevAutomaton(t *testing.T) {
// Create a dummy IndexSnapshot (parent doesn't matter for this method)
is := &IndexSnapshot{}
tests := []struct {
name string
term string
fuzziness uint8
expectError bool
errorMsg string // Optional: check specific error message
}{
{
name: "fuzziness 1",
term: "test",
fuzziness: 1,
expectError: false,
},
{
name: "fuzziness 2",
term: "another",
fuzziness: 2,
expectError: false,
},
{
name: "fuzziness 0",
term: "zero",
fuzziness: 0,
expectError: true,
errorMsg: "fuzziness exceeds the max limit",
},
{
name: "fuzziness 3",
term: "three",
fuzziness: 3,
expectError: true,
errorMsg: "fuzziness exceeds the max limit",
},
{
name: "empty term fuzziness 1",
term: "",
fuzziness: 1,
expectError: false,
},
{
name: "empty term fuzziness 2",
term: "",
fuzziness: 2,
expectError: false,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
gotAutomaton, err := is.getLevAutomaton(tt.term, tt.fuzziness)
if tt.expectError {
if err == nil {
t.Errorf("getLevAutomaton() expected an error but got nil")
} else if tt.errorMsg != "" && err.Error() != tt.errorMsg {
t.Errorf("getLevAutomaton() expected error msg %q but got %q", tt.errorMsg, err.Error())
}
if gotAutomaton != nil {
t.Errorf("getLevAutomaton() expected nil automaton on error but got %v", gotAutomaton)
}
} else {
if err != nil {
t.Errorf("getLevAutomaton() got unexpected error: %v", err)
}
if gotAutomaton == nil {
t.Errorf("getLevAutomaton() expected a valid automaton but got nil")
}
// Optional: Check type if needed, though non-nil is usually sufficient
_, ok := gotAutomaton.(vellum.Automaton)
if !ok {
t.Errorf("getLevAutomaton() returned type is not vellum.Automaton")
}
}
})
}
}
// Add other tests for snapshot_index.go below if needed...
================================================
FILE: index/scorch/snapshot_index_tfr.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"context"
"fmt"
"reflect"
"sync/atomic"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"
)
var reflectStaticSizeIndexSnapshotTermFieldReader int
func init() {
var istfr IndexSnapshotTermFieldReader
reflectStaticSizeIndexSnapshotTermFieldReader = int(reflect.TypeOf(istfr).Size())
}
type IndexSnapshotTermFieldReader struct {
term []byte
field string
snapshot *IndexSnapshot
dicts []segment.TermDictionary
postings []segment.PostingsList
iterators []segment.PostingsIterator
segmentOffset int
includeFreq bool
includeNorm bool
includeTermVectors bool
currPosting segment.Posting
currID index.IndexInternalID
recycle bool
bytesRead uint64
ctx context.Context
unadorned bool
// flag to indicate whether to increment our bytesRead
// value after creation of the TFR while iterating our postings
// lists
updateBytesRead bool
}
func (i *IndexSnapshotTermFieldReader) incrementBytesRead(val uint64) {
i.bytesRead += val
}
func (i *IndexSnapshotTermFieldReader) Size() int {
sizeInBytes := reflectStaticSizeIndexSnapshotTermFieldReader + size.SizeOfPtr +
len(i.term) +
len(i.field) +
len(i.currID)
for _, entry := range i.postings {
sizeInBytes += entry.Size()
}
for _, entry := range i.iterators {
sizeInBytes += entry.Size()
}
if i.currPosting != nil {
sizeInBytes += i.currPosting.Size()
}
return sizeInBytes
}
func (i *IndexSnapshotTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
rv := preAlloced
if rv == nil {
rv = &index.TermFieldDoc{}
}
var prevBytesRead uint64
// find the next hit
for i.segmentOffset < len(i.iterators) {
// get our current postings iterator
curItr := i.iterators[i.segmentOffset]
if i.updateBytesRead {
prevBytesRead = curItr.BytesRead()
}
next, err := curItr.Next()
if err != nil {
return nil, err
}
if next != nil {
// make segment number into global number by adding offset
globalOffset := i.snapshot.offsets[i.segmentOffset]
nnum := next.Number()
rv.ID = index.NewIndexInternalID(rv.ID, nnum+globalOffset)
i.postingToTermFieldDoc(next, rv)
i.currID = rv.ID
i.currPosting = next
if i.updateBytesRead {
// postingsIterators maintains the bytesRead stat in a cumulative fashion.
// this is because there are chances of having a series of loadChunk calls,
// and they have to be added together before sending the bytesRead at this point
// upstream.
bytesRead := curItr.BytesRead()
if bytesRead > prevBytesRead {
i.incrementBytesRead(bytesRead - prevBytesRead)
}
}
return rv, nil
}
i.segmentOffset++
}
return nil, nil
}
func (i *IndexSnapshotTermFieldReader) postingToTermFieldDoc(next segment.Posting, rv *index.TermFieldDoc) {
if i.includeFreq {
rv.Freq = next.Frequency()
}
if i.includeNorm {
rv.Norm = next.Norm()
}
if i.includeTermVectors {
locs := next.Locations()
if cap(rv.Vectors) < len(locs) {
rv.Vectors = make([]*index.TermFieldVector, len(locs))
backing := make([]index.TermFieldVector, len(locs))
for i := range backing {
rv.Vectors[i] = &backing[i]
}
}
rv.Vectors = rv.Vectors[:len(locs)]
for i, loc := range locs {
*rv.Vectors[i] = index.TermFieldVector{
Start: loc.Start(),
End: loc.End(),
Pos: loc.Pos(),
ArrayPositions: loc.ArrayPositions(),
Field: loc.Field(),
}
}
}
}
func (i *IndexSnapshotTermFieldReader) Advance(ID index.IndexInternalID, preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
// FIXME do something better
// for now, if we need to seek backwards, then restart from the beginning
if i.currPosting != nil && i.currID.Compare(ID) >= 0 {
// Check if the TFR is a special unadorned composite optimization.
// Such a TFR will NOT have a valid `term` or `field` set, making it
// impossible for the TFR to replace itself with a new one.
if !i.unadorned {
i2, err := i.snapshot.TermFieldReader(context.TODO(), i.term, i.field,
i.includeFreq, i.includeNorm, i.includeTermVectors)
if err != nil {
return nil, err
}
// close the current term field reader before replacing it with a new one
_ = i.Close()
*i = *(i2.(*IndexSnapshotTermFieldReader))
} else {
// unadorned composite optimization
// we need to reset all the iterators
// back to the beginning, which effectively
// achieves the same thing as the above
for _, iter := range i.iterators {
if optimizedIterator, ok := iter.(ResetablePostingsIterator); ok {
optimizedIterator.ResetIterator()
}
}
}
}
num, err := ID.Value()
if err != nil {
return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err)
}
segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num)
if segIndex >= len(i.snapshot.segment) {
return nil, fmt.Errorf("computed segment index %d out of bounds %d",
segIndex, len(i.snapshot.segment))
}
// skip directly to the target segment
i.segmentOffset = segIndex
next, err := i.iterators[i.segmentOffset].Advance(ldocNum)
if err != nil {
return nil, err
}
if next == nil {
// we jumped directly to the segment that should have contained it
// but it wasn't there, so reuse Next() which should correctly
// get the next hit after it (we moved i.segmentOffset)
return i.Next(preAlloced)
}
if preAlloced == nil {
preAlloced = &index.TermFieldDoc{}
}
preAlloced.ID = index.NewIndexInternalID(preAlloced.ID, next.Number()+
i.snapshot.offsets[segIndex])
i.postingToTermFieldDoc(next, preAlloced)
i.currID = preAlloced.ID
i.currPosting = next
return preAlloced, nil
}
func (i *IndexSnapshotTermFieldReader) Count() uint64 {
var rv uint64
for _, posting := range i.postings {
rv += posting.Count()
}
return rv
}
func (i *IndexSnapshotTermFieldReader) Close() error {
if i.ctx != nil {
statsCallbackFn := i.ctx.Value(search.SearchIOStatsCallbackKey)
if statsCallbackFn != nil {
// essentially before you close the TFR, you must report this
// reader's bytesRead value
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(i.bytesRead)
}
search.RecordSearchCost(i.ctx, search.AddM, i.bytesRead)
}
if i.snapshot != nil {
atomic.AddUint64(&i.snapshot.parent.stats.TotTermSearchersFinished, uint64(1))
i.snapshot.recycleTermFieldReader(i)
}
return nil
}
================================================
FILE: index/scorch/snapshot_index_thes.go
================================================
// Copyright (c) 2024 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"container/heap"
index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"
)
type segmentThesCursor struct {
thes segment.Thesaurus
itr segment.ThesaurusIterator
curr index.ThesaurusEntry
}
type IndexSnapshotThesaurusKeys struct {
snapshot *IndexSnapshot
cursors []*segmentThesCursor
entry index.ThesaurusEntry
}
func (i *IndexSnapshotThesaurusKeys) Len() int { return len(i.cursors) }
func (i *IndexSnapshotThesaurusKeys) Less(a, b int) bool {
return i.cursors[a].curr.Term < i.cursors[b].curr.Term
}
func (i *IndexSnapshotThesaurusKeys) Swap(a, b int) {
i.cursors[a], i.cursors[b] = i.cursors[b], i.cursors[a]
}
func (i *IndexSnapshotThesaurusKeys) Push(x interface{}) {
i.cursors = append(i.cursors, x.(*segmentThesCursor))
}
func (i *IndexSnapshotThesaurusKeys) Pop() interface{} {
n := len(i.cursors)
x := i.cursors[n-1]
i.cursors = i.cursors[0 : n-1]
return x
}
func (i *IndexSnapshotThesaurusKeys) Next() (*index.ThesaurusEntry, error) {
if len(i.cursors) == 0 {
return nil, nil
}
i.entry = i.cursors[0].curr
next, err := i.cursors[0].itr.Next()
if err != nil {
return nil, err
}
if next == nil {
// at end of this cursor, remove it
heap.Pop(i)
} else {
// modified heap, fix it
i.cursors[0].curr = *next
heap.Fix(i, 0)
}
// look for any other entries with the exact same term
for len(i.cursors) > 0 && i.cursors[0].curr.Term == i.entry.Term {
next, err := i.cursors[0].itr.Next()
if err != nil {
return nil, err
}
if next == nil {
// at end of this cursor, remove it
heap.Pop(i)
} else {
// modified heap, fix it
i.cursors[0].curr = *next
heap.Fix(i, 0)
}
}
return &i.entry, nil
}
func (i *IndexSnapshotThesaurusKeys) Close() error {
return nil
}
func (i *IndexSnapshotThesaurusKeys) Contains(key []byte) (bool, error) {
if len(i.cursors) == 0 {
return false, nil
}
for _, cursor := range i.cursors {
if found, _ := cursor.thes.Contains(key); found {
return true, nil
}
}
return false, nil
}
================================================
FILE: index/scorch/snapshot_index_vr.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package scorch
import (
"context"
"encoding/json"
"fmt"
"reflect"
"sort"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
segment_api "github.com/blevesearch/scorch_segment_api/v2"
)
const VectorSearchSupportedSegmentVersion = 16
var reflectStaticSizeIndexSnapshotVectorReader int
func init() {
var istfr IndexSnapshotVectorReader
reflectStaticSizeIndexSnapshotVectorReader = int(reflect.TypeOf(istfr).Size())
}
type IndexSnapshotVectorReader struct {
vector []float32
field string
k int64
snapshot *IndexSnapshot
postings []segment_api.VecPostingsList
iterators []segment_api.VecPostingsIterator
segmentOffset int
currPosting segment_api.VecPosting
currID index.IndexInternalID
ctx context.Context
searchParams json.RawMessage
eligibleSelector index.EligibleDocumentSelector
}
func (i *IndexSnapshotVectorReader) Size() int {
sizeInBytes := reflectStaticSizeIndexSnapshotVectorReader + size.SizeOfPtr +
len(i.vector)*size.SizeOfFloat32 +
len(i.field) +
len(i.currID)
for _, entry := range i.postings {
sizeInBytes += entry.Size()
}
for _, entry := range i.iterators {
sizeInBytes += entry.Size()
}
if i.currPosting != nil {
sizeInBytes += i.currPosting.Size()
}
return sizeInBytes
}
func (i *IndexSnapshotVectorReader) Next(preAlloced *index.VectorDoc) (
*index.VectorDoc, error) {
rv := preAlloced
if rv == nil {
rv = &index.VectorDoc{}
}
for i.segmentOffset < len(i.iterators) {
if i.iterators[i.segmentOffset] == nil {
i.segmentOffset++
continue
}
next, err := i.iterators[i.segmentOffset].Next()
if err != nil {
return nil, err
}
if next != nil {
// make segment number into global number by adding offset
globalOffset := i.snapshot.offsets[i.segmentOffset]
nnum := next.Number()
rv.ID = index.NewIndexInternalID(rv.ID, nnum+globalOffset)
rv.Score = float64(next.Score())
i.currID = rv.ID
i.currPosting = next
return rv, nil
}
i.segmentOffset++
}
return nil, nil
}
func (i *IndexSnapshotVectorReader) Advance(ID index.IndexInternalID,
preAlloced *index.VectorDoc) (*index.VectorDoc, error) {
if i.currPosting != nil && i.currID.Compare(ID) >= 0 {
i2, err := i.snapshot.VectorReader(i.ctx, i.vector, i.field, i.k,
i.searchParams, i.eligibleSelector)
if err != nil {
return nil, err
}
// close the current term field reader before replacing it with a new one
_ = i.Close()
*i = *(i2.(*IndexSnapshotVectorReader))
}
num, err := ID.Value()
if err != nil {
return nil, fmt.Errorf("error converting to doc number % x - %v", ID, err)
}
segIndex, ldocNum := i.snapshot.segmentIndexAndLocalDocNumFromGlobal(num)
if segIndex >= len(i.snapshot.segment) {
return nil, fmt.Errorf("computed segment index %d out of bounds %d",
segIndex, len(i.snapshot.segment))
}
// skip directly to the target segment
i.segmentOffset = segIndex
next, err := i.iterators[i.segmentOffset].Advance(ldocNum)
if err != nil {
return nil, err
}
if next == nil {
// we jumped directly to the segment that should have contained it
// but it wasn't there, so reuse Next() which should correctly
// get the next hit after it (we moved i.segmentOffset)
return i.Next(preAlloced)
}
if preAlloced == nil {
preAlloced = &index.VectorDoc{}
}
preAlloced.ID = index.NewIndexInternalID(preAlloced.ID, next.Number()+
i.snapshot.offsets[segIndex])
i.currID = preAlloced.ID
i.currPosting = next
return preAlloced, nil
}
func (i *IndexSnapshotVectorReader) Count() uint64 {
var rv uint64
for _, posting := range i.postings {
rv += posting.Count()
}
return rv
}
func (i *IndexSnapshotVectorReader) Close() error {
// TODO Consider if any scope of recycling here.
return nil
}
func (i *IndexSnapshot) CentroidCardinalities(field string, limit int, descending bool) (
[]index.CentroidCardinality, error) {
if len(i.segment) == 0 {
return nil, nil
}
if limit <= 0 {
return nil, fmt.Errorf("limit must be positive")
}
centroids := make([]index.CentroidCardinality, 0, limit*len(i.segment))
for _, segment := range i.segment {
if sv, ok := segment.segment.(segment_api.VectorSegment); ok {
vecIndex, err := sv.InterpretVectorIndex(field, segment.deleted)
if err != nil {
return nil, fmt.Errorf("failed to interpret vector index for field %s in segment: %v", field, err)
}
centroidCardinalities, err := vecIndex.ObtainKCentroidCardinalitiesFromIVFIndex(limit, descending)
if err != nil {
return nil, fmt.Errorf("failed to obtain top k centroid cardinalities for field %s in segment: %v", field, err)
}
if len(centroidCardinalities) > 0 {
centroids = append(centroids, centroidCardinalities...)
}
}
}
if len(centroids) == 0 {
return nil, nil
}
sort.Slice(centroids, func(i, j int) bool {
if descending {
return centroids[i].Cardinality > centroids[j].Cardinality
}
return centroids[i].Cardinality < centroids[j].Cardinality
})
if limit >= len(centroids) {
return centroids, nil
}
return centroids[:limit], nil
}
================================================
FILE: index/scorch/snapshot_segment.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"bytes"
"os"
"sync"
"sync/atomic"
"github.com/RoaringBitmap/roaring/v2"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
segment "github.com/blevesearch/scorch_segment_api/v2"
)
type SegmentSnapshot struct {
// this flag is needed to identify whether this
// segment was mmaped recently, in which case
// we consider the loading cost of the metadata
// as part of IO stats.
mmaped uint32
id uint64
segment segment.Segment
deleted *roaring.Bitmap
creator string
stats *fieldStats
updatedFields map[string]*index.UpdateFieldInfo
cachedMeta *cachedMeta
cachedDocs *cachedDocs
}
func (s *SegmentSnapshot) Segment() segment.Segment {
return s.segment
}
func (s *SegmentSnapshot) Deleted() *roaring.Bitmap {
return s.deleted
}
func (s *SegmentSnapshot) Id() uint64 {
return s.id
}
func (s *SegmentSnapshot) FullSize() int64 {
return int64(s.segment.Count())
}
func (s *SegmentSnapshot) LiveSize() int64 {
return int64(s.Count())
}
func (s *SegmentSnapshot) HasVector() bool {
// number of vectors, for each vector field in the segment
numVecs := s.stats.Fetch()["num_vectors"]
return len(numVecs) > 0
}
func (s *SegmentSnapshot) FileSize() int64 {
ps, ok := s.segment.(segment.PersistedSegment)
if !ok {
return 0
}
path := ps.Path()
if path == "" {
return 0
}
fi, err := os.Stat(path)
if err != nil {
return 0
}
return fi.Size()
}
func (s *SegmentSnapshot) Close() error {
return s.segment.Close()
}
func (s *SegmentSnapshot) VisitDocument(num uint64, visitor segment.StoredFieldValueVisitor) error {
return s.segment.VisitStoredFields(num, visitor)
}
func (s *SegmentSnapshot) DocID(num uint64) ([]byte, error) {
return s.segment.DocID(num)
}
func (s *SegmentSnapshot) Count() uint64 {
rv := s.segment.Count()
if s.deleted != nil {
rv -= s.deleted.GetCardinality()
}
return rv
}
// this counts the root documents in the segment this differs from Count() in that
// Count() counts all live documents including nested children, whereas this method
// counts only root live documents
func (s *SegmentSnapshot) CountRoot() uint64 {
var rv uint64
if nsb, ok := s.segment.(segment.NestedSegment); ok {
rv = nsb.CountRoot(s.deleted)
} else {
rv = s.Count()
}
return rv
}
func (s *SegmentSnapshot) DocNumbers(docIDs []string) (*roaring.Bitmap, error) {
rv, err := s.segment.DocNumbers(docIDs)
if err != nil {
return nil, err
}
if s.deleted != nil {
rv.AndNot(s.deleted)
}
return rv, nil
}
// DocNumbersLive returns a bitmap containing doc numbers for all live docs
func (s *SegmentSnapshot) DocNumbersLive() *roaring.Bitmap {
rv := roaring.NewBitmap()
rv.AddRange(0, s.segment.Count())
if s.deleted != nil {
rv.AndNot(s.deleted)
}
return rv
}
func (s *SegmentSnapshot) Fields() []string {
return s.segment.Fields()
}
func (s *SegmentSnapshot) Size() (rv int) {
rv = s.segment.Size()
if s.deleted != nil {
rv += int(s.deleted.GetSizeInBytes())
}
rv += s.cachedDocs.Size()
return
}
// Merge given updated field information with existing and pass it on to the segment base
func (s *SegmentSnapshot) UpdateFieldsInfo(updatedFields map[string]*index.UpdateFieldInfo) {
if s.updatedFields == nil {
s.updatedFields = updatedFields
} else {
for fieldName, info := range updatedFields {
if val, ok := s.updatedFields[fieldName]; ok {
val.Deleted = val.Deleted || info.Deleted
val.Index = val.Index || info.Index
val.DocValues = val.DocValues || info.DocValues
val.Store = val.Store || info.Store
} else {
s.updatedFields[fieldName] = info
}
}
}
if segment, ok := s.segment.(segment.UpdatableSegment); ok {
segment.SetUpdatedFields(s.updatedFields)
}
}
type cachedFieldDocs struct {
m sync.Mutex
readyCh chan struct{} // closed when the cachedFieldDocs.docs is ready to be used.
err error // Non-nil if there was an error when preparing this cachedFieldDocs.
docs map[uint64][]byte // Keyed by localDocNum, value is a list of terms delimited by 0xFF.
size uint64
}
func (cfd *cachedFieldDocs) Size() int {
var rv int
cfd.m.Lock()
for _, entry := range cfd.docs {
rv += 8 /* size of uint64 */ + len(entry)
}
cfd.m.Unlock()
return rv
}
func (cfd *cachedFieldDocs) prepareField(field string, ss *SegmentSnapshot) {
cfd.m.Lock()
defer func() {
close(cfd.readyCh)
cfd.m.Unlock()
}()
cfd.size += uint64(size.SizeOfUint64) /* size field */
dict, err := ss.segment.Dictionary(field)
if err != nil {
cfd.err = err
return
}
var postings segment.PostingsList
var postingsItr segment.PostingsIterator
dictItr := dict.AutomatonIterator(nil, nil, nil)
next, err := dictItr.Next()
for err == nil && next != nil {
var err1 error
postings, err1 = dict.PostingsList([]byte(next.Term), nil, postings)
if err1 != nil {
cfd.err = err1
return
}
cfd.size += uint64(size.SizeOfUint64) /* map key */
postingsItr = postings.Iterator(false, false, false, postingsItr)
nextPosting, err2 := postingsItr.Next()
for err2 == nil && nextPosting != nil {
docNum := nextPosting.Number()
cfd.docs[docNum] = append(cfd.docs[docNum], []byte(next.Term)...)
cfd.docs[docNum] = append(cfd.docs[docNum], index.DocValueTermSeparator)
cfd.size += uint64(len(next.Term) + 1) // map value
nextPosting, err2 = postingsItr.Next()
}
if err2 != nil {
cfd.err = err2
return
}
next, err = dictItr.Next()
}
if err != nil {
cfd.err = err
return
}
}
type cachedDocs struct {
size uint64
m sync.RWMutex // As the cache is asynchronously prepared, need a lock
cache map[string]*cachedFieldDocs // Keyed by field
}
func (c *cachedDocs) prepareFields(wantedFields []string, ss *SegmentSnapshot) error {
c.m.Lock()
if c.cache == nil {
c.cache = make(map[string]*cachedFieldDocs, len(ss.Fields()))
}
for _, field := range wantedFields {
_, exists := c.cache[field]
if !exists {
c.cache[field] = &cachedFieldDocs{
readyCh: make(chan struct{}),
docs: make(map[uint64][]byte),
}
go c.cache[field].prepareField(field, ss)
}
}
for _, field := range wantedFields {
cachedFieldDocs := c.cache[field]
c.m.Unlock()
<-cachedFieldDocs.readyCh
if cachedFieldDocs.err != nil {
return cachedFieldDocs.err
}
c.m.Lock()
}
c.updateSizeLOCKED()
c.m.Unlock()
return nil
}
// hasFields returns true if the cache has all the given fields
func (c *cachedDocs) hasFields(fields []string) bool {
c.m.RLock()
for _, field := range fields {
if _, exists := c.cache[field]; !exists {
c.m.RUnlock()
return false // found a field not in cache
}
}
c.m.RUnlock()
return true
}
func (c *cachedDocs) Size() int {
return int(atomic.LoadUint64(&c.size))
}
func (c *cachedDocs) updateSizeLOCKED() {
sizeInBytes := 0
for k, v := range c.cache { // cachedFieldDocs
sizeInBytes += len(k)
if v != nil {
sizeInBytes += v.Size()
}
}
atomic.StoreUint64(&c.size, uint64(sizeInBytes))
}
func (c *cachedDocs) visitDoc(localDocNum uint64,
fields []string, visitor index.DocValueVisitor) {
c.m.RLock()
for _, field := range fields {
if cachedFieldDocs, exists := c.cache[field]; exists {
c.m.RUnlock()
<-cachedFieldDocs.readyCh
c.m.RLock()
if tlist, exists := cachedFieldDocs.docs[localDocNum]; exists {
for {
i := bytes.IndexByte(tlist, index.DocValueTermSeparator)
if i < 0 {
break
}
visitor(field, tlist[0:i])
tlist = tlist[i+1:]
}
}
}
}
c.m.RUnlock()
}
// the purpose of the cachedMeta is to simply allow the user of this type to record
// and cache certain meta data information (specific to the segment) that can be
// used across calls to save compute on the same.
// for example searcher creations on the same index snapshot can use this struct
// to help and fetch the backing index size information which can be used in
// memory usage calculation thereby deciding whether to allow a query or not.
type cachedMeta struct {
m sync.RWMutex
meta map[string]interface{}
}
func (c *cachedMeta) updateMeta(field string, val interface{}) {
c.m.Lock()
if c.meta == nil {
c.meta = make(map[string]interface{})
}
c.meta[field] = val
c.m.Unlock()
}
func (c *cachedMeta) fetchMeta(field string) (rv interface{}) {
c.m.RLock()
rv = c.meta[field]
c.m.RUnlock()
return rv
}
func (s *SegmentSnapshot) Ancestors(docNum uint64, prealloc []index.AncestorID) []index.AncestorID {
nsb, ok := s.segment.(segment.NestedSegment)
if !ok {
return append(prealloc, index.NewAncestorID(docNum))
}
return nsb.Ancestors(docNum, prealloc)
}
================================================
FILE: index/scorch/snapshot_vector_index.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package scorch
import (
"context"
"encoding/json"
"fmt"
"github.com/bits-and-blooms/bitset"
index "github.com/blevesearch/bleve_index_api"
segment_api "github.com/blevesearch/scorch_segment_api/v2"
)
func (is *IndexSnapshot) VectorReader(ctx context.Context, vector []float32,
field string, k int64, searchParams json.RawMessage,
eligibleSelector index.EligibleDocumentSelector) (
index.VectorReader, error) {
rv := &IndexSnapshotVectorReader{
vector: vector,
field: field,
k: k,
snapshot: is,
searchParams: searchParams,
eligibleSelector: eligibleSelector,
postings: make([]segment_api.VecPostingsList, len(is.segment)),
iterators: make([]segment_api.VecPostingsIterator, len(is.segment)),
}
// initialize postings and iterators within the OptimizeVR's Finish()
return rv, nil
}
// eligibleDocumentList represents the list of eligible documents within a segment.
type eligibleDocumentList struct {
bs *bitset.BitSet
}
// Iterator returns an iterator for the eligible document IDs.
func (edl *eligibleDocumentList) Iterator() index.EligibleDocumentIterator {
if edl.bs == nil {
// no eligible documents
return emptyEligibleIterator
}
// return the iterator
return &eligibleDocumentIterator{
bs: edl.bs,
}
}
// Count returns the number of eligible document IDs.
func (edl *eligibleDocumentList) Count() uint64 {
if edl.bs == nil {
return 0
}
return uint64(edl.bs.Count())
}
// emptyEligibleDocumentList is a reusable empty eligible document list.
var emptyEligibleDocumentList = &eligibleDocumentList{}
// eligibleDocumentIterator iterates over eligible document IDs within a segment.
type eligibleDocumentIterator struct {
bs *bitset.BitSet
current uint
}
// Next returns the next eligible document ID and whether it exists.
func (it *eligibleDocumentIterator) Next() (id uint64, ok bool) {
next, found := it.bs.NextSet(it.current)
if !found {
return 0, false
}
it.current = next + 1
return uint64(next), true
}
// emptyEligibleIterator is a reusable empty eligible document iterator.
var emptyEligibleIterator = &emptyEligibleDocumentIterator{}
// emptyEligibleDocumentIterator is an iterator that always returns no documents.
type emptyEligibleDocumentIterator struct{}
// Next always returns false for empty iterator.
func (it *emptyEligibleDocumentIterator) Next() (id uint64, ok bool) {
return 0, false
}
// eligibleDocumentSelector is used to filter out documents that are eligible for
// the KNN search from a pre-filter query.
type eligibleDocumentSelector struct {
// segment ID -> segment local doc nums in a bitset
eligibleDocNums []*bitset.BitSet
is *IndexSnapshot
}
// SegmentEligibleDocuments returns an EligibleDocumentList for the specified segment ID.
func (eds *eligibleDocumentSelector) SegmentEligibleDocuments(segmentID int) index.EligibleDocumentList {
if eds.eligibleDocNums == nil || segmentID < 0 || segmentID >= len(eds.eligibleDocNums) {
return emptyEligibleDocumentList
}
bs := eds.eligibleDocNums[segmentID]
if bs == nil {
// no eligible documents for this segment
return emptyEligibleDocumentList
}
return &eligibleDocumentList{
bs: bs,
}
}
// AddEligibleDocumentMatch adds a document match to the list of eligible documents.
func (eds *eligibleDocumentSelector) AddEligibleDocumentMatch(id index.IndexInternalID) error {
if eds.is == nil {
return fmt.Errorf("eligibleDocumentSelector is not initialized with IndexSnapshot")
}
// Get the segment number and the local doc number for this document.
segIdx, docNum, err := eds.is.segmentIndexAndLocalDocNum(id)
if err != nil {
return err
}
// allocate a bitset for this segment if needed
if eds.eligibleDocNums[segIdx] == nil {
// the size of the bitset is the full size of the segment (which is the max local doc num + 1)
eds.eligibleDocNums[segIdx] = bitset.New(uint(eds.is.segment[segIdx].FullSize()))
}
// Add the local doc number to the list of eligible doc numbers for this segment.
eds.eligibleDocNums[segIdx].Set(uint(docNum))
return nil
}
func (is *IndexSnapshot) NewEligibleDocumentSelector() index.EligibleDocumentSelector {
return &eligibleDocumentSelector{
eligibleDocNums: make([]*bitset.BitSet, len(is.segment)),
is: is,
}
}
================================================
FILE: index/scorch/stats.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"reflect"
"sync/atomic"
"github.com/blevesearch/bleve/v2/util"
)
// Stats tracks statistics about the index, fields that are
// prefixed like CurXxxx are gauges (can go up and down),
// and fields that are prefixed like TotXxxx are monotonically
// increasing counters.
type Stats struct {
TotUpdates uint64
TotDeletes uint64
TotBatches uint64
TotBatchesEmpty uint64
TotBatchIntroTime uint64
MaxBatchIntroTime uint64
CurRootEpoch uint64
LastPersistedEpoch uint64
LastMergedEpoch uint64
TotOnErrors uint64
TotAnalysisTime uint64
TotIndexTime uint64
TotIndexedPlainTextBytes uint64
TotBytesReadAtQueryTime uint64
TotBytesWrittenAtIndexTime uint64
TotTermSearchersStarted uint64
TotTermSearchersFinished uint64
TotKNNSearches uint64
TotSynonymSearches uint64
TotEventTriggerStarted uint64
TotEventTriggerCompleted uint64
TotIntroduceLoop uint64
TotIntroduceSegmentBeg uint64
TotIntroduceSegmentEnd uint64
TotIntroducePersistBeg uint64
TotIntroducePersistEnd uint64
TotIntroduceMergeBeg uint64
TotIntroduceMergeEnd uint64
TotIntroduceRevertBeg uint64
TotIntroduceRevertEnd uint64
TotIntroducedItems uint64
TotIntroducedSegmentsBatch uint64
TotIntroducedSegmentsMerge uint64
TotPersistLoopBeg uint64
TotPersistLoopErr uint64
TotPersistLoopProgress uint64
TotPersistLoopWait uint64
TotPersistLoopWaitNotified uint64
TotPersistLoopEnd uint64
TotPersistedItems uint64
TotItemsToPersist uint64
TotPersistedSegments uint64
TotMutationsFiltered uint64
TotPersisterSlowMergerPause uint64
TotPersisterSlowMergerResume uint64
TotPersisterNapPauseCompleted uint64
TotPersisterMergerNapBreak uint64
TotFileMergeLoopBeg uint64
TotFileMergeLoopErr uint64
TotFileMergeLoopEnd uint64
TotFileMergeForceOpsStarted uint64
TotFileMergeForceOpsCompleted uint64
TotFileMergePlan uint64
TotFileMergePlanErr uint64
TotFileMergePlanNone uint64
TotFileMergePlanOk uint64
TotFileMergePlanTasks uint64
TotFileMergePlanTasksDone uint64
TotFileMergePlanTasksErr uint64
TotFileMergePlanTasksSegments uint64
TotFileMergePlanTasksSegmentsEmpty uint64
TotFileMergeSegmentsEmpty uint64
TotFileMergeSegments uint64
TotFileSegmentsAtRoot uint64
TotFileMergeWrittenBytes uint64
TotFileMergeZapBeg uint64
TotFileMergeZapEnd uint64
TotFileMergeZapTime uint64
MaxFileMergeZapTime uint64
TotFileMergeZapIntroductionTime uint64
MaxFileMergeZapIntroductionTime uint64
TotFileMergeIntroductions uint64
TotFileMergeIntroductionsDone uint64
TotFileMergeIntroductionsSkipped uint64
TotFileMergeIntroductionsObsoleted uint64
CurFilesIneligibleForRemoval uint64
TotSnapshotsRemovedFromMetaStore uint64
TotMemMergeBeg uint64
TotMemMergeErr uint64
TotMemMergeDone uint64
TotMemMergeZapBeg uint64
TotMemMergeZapEnd uint64
TotMemMergeZapTime uint64
MaxMemMergeZapTime uint64
TotMemMergeSegments uint64
TotMemorySegmentsAtRoot uint64
}
// atomically populates the returned map
func (s *Stats) ToMap() map[string]interface{} {
m := map[string]interface{}{}
sve := reflect.ValueOf(s).Elem()
svet := sve.Type()
for i := 0; i < svet.NumField(); i++ {
svef := sve.Field(i)
if svef.CanAddr() {
svefp := svef.Addr().Interface()
m[svet.Field(i).Name] = atomic.LoadUint64(svefp.(*uint64))
}
}
return m
}
// MarshalJSON implements json.Marshaler, and in contrast to standard
// json marshaling provides atomic safety
func (s *Stats) MarshalJSON() ([]byte, error) {
return util.MarshalJSON(s.ToMap())
}
================================================
FILE: index/scorch/unadorned.go
================================================
// Copyright (c) 2020 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorch
import (
"math"
"reflect"
"github.com/RoaringBitmap/roaring/v2"
segment "github.com/blevesearch/scorch_segment_api/v2"
)
var reflectStaticSizeUnadornedPostingsIteratorBitmap int
var reflectStaticSizeUnadornedPostingsIterator1Hit int
var reflectStaticSizeUnadornedPosting int
func init() {
var pib unadornedPostingsIteratorBitmap
reflectStaticSizeUnadornedPostingsIteratorBitmap = int(reflect.TypeOf(pib).Size())
var pi1h unadornedPostingsIterator1Hit
reflectStaticSizeUnadornedPostingsIterator1Hit = int(reflect.TypeOf(pi1h).Size())
var up UnadornedPosting
reflectStaticSizeUnadornedPosting = int(reflect.TypeOf(up).Size())
}
type unadornedPostingsIteratorBitmap struct {
actual roaring.IntPeekable
actualBM *roaring.Bitmap
next UnadornedPosting // reused across Next() calls
}
func (i *unadornedPostingsIteratorBitmap) Next() (segment.Posting, error) {
return i.nextAtOrAfter(0)
}
func (i *unadornedPostingsIteratorBitmap) Advance(docNum uint64) (segment.Posting, error) {
return i.nextAtOrAfter(docNum)
}
func (i *unadornedPostingsIteratorBitmap) nextAtOrAfter(atOrAfter uint64) (segment.Posting, error) {
docNum, exists := i.nextDocNumAtOrAfter(atOrAfter)
if !exists {
return nil, nil
}
i.next = UnadornedPosting{} // clear the struct
rv := &i.next
rv.docNum = docNum
return rv, nil
}
func (i *unadornedPostingsIteratorBitmap) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
if i.actual == nil || !i.actual.HasNext() {
return 0, false
}
i.actual.AdvanceIfNeeded(uint32(atOrAfter))
if !i.actual.HasNext() {
return 0, false // couldn't find anything
}
return uint64(i.actual.Next()), true
}
func (i *unadornedPostingsIteratorBitmap) Size() int {
return reflectStaticSizeUnadornedPostingsIteratorBitmap
}
func (i *unadornedPostingsIteratorBitmap) BytesRead() uint64 {
return 0
}
func (i *unadornedPostingsIteratorBitmap) BytesWritten() uint64 {
return 0
}
func (i *unadornedPostingsIteratorBitmap) ResetBytesRead(uint64) {}
func (i *unadornedPostingsIteratorBitmap) ActualBitmap() *roaring.Bitmap {
return i.actualBM
}
func (i *unadornedPostingsIteratorBitmap) DocNum1Hit() (uint64, bool) {
return 0, false
}
func (i *unadornedPostingsIteratorBitmap) ReplaceActual(actual *roaring.Bitmap) {
i.actualBM = actual
i.actual = actual.Iterator()
}
// Resets the iterator to the beginning of the postings list.
// by resetting the actual iterator.
func (i *unadornedPostingsIteratorBitmap) ResetIterator() {
i.actual = i.actualBM.Iterator()
}
func newUnadornedPostingsIteratorFromBitmap(bm *roaring.Bitmap) segment.PostingsIterator {
return &unadornedPostingsIteratorBitmap{
actualBM: bm,
actual: bm.Iterator(),
}
}
const docNum1HitFinished = math.MaxUint64
type unadornedPostingsIterator1Hit struct {
docNumOrig uint64 // original 1-hit docNum used to create this iterator
docNum uint64 // current docNum
next UnadornedPosting // reused across Next() calls
}
func (i *unadornedPostingsIterator1Hit) Next() (segment.Posting, error) {
return i.nextAtOrAfter(0)
}
func (i *unadornedPostingsIterator1Hit) Advance(docNum uint64) (segment.Posting, error) {
return i.nextAtOrAfter(docNum)
}
func (i *unadornedPostingsIterator1Hit) nextAtOrAfter(atOrAfter uint64) (segment.Posting, error) {
docNum, exists := i.nextDocNumAtOrAfter(atOrAfter)
if !exists {
return nil, nil
}
i.next = UnadornedPosting{} // clear the struct
rv := &i.next
rv.docNum = docNum
return rv, nil
}
func (i *unadornedPostingsIterator1Hit) nextDocNumAtOrAfter(atOrAfter uint64) (uint64, bool) {
if i.docNum == docNum1HitFinished {
return 0, false
}
if i.docNum < atOrAfter {
// advanced past our 1-hit
i.docNum = docNum1HitFinished // consume our 1-hit docNum
return 0, false
}
docNum := i.docNum
i.docNum = docNum1HitFinished // consume our 1-hit docNum
return docNum, true
}
func (i *unadornedPostingsIterator1Hit) Size() int {
return reflectStaticSizeUnadornedPostingsIterator1Hit
}
func (i *unadornedPostingsIterator1Hit) BytesRead() uint64 {
return 0
}
func (i *unadornedPostingsIterator1Hit) BytesWritten() uint64 {
return 0
}
func (i *unadornedPostingsIterator1Hit) ResetBytesRead(uint64) {}
// ResetIterator resets the iterator to the original state.
func (i *unadornedPostingsIterator1Hit) ResetIterator() {
i.docNum = i.docNumOrig
}
func newUnadornedPostingsIteratorFrom1Hit(docNum1Hit uint64) segment.PostingsIterator {
return &unadornedPostingsIterator1Hit{
docNumOrig: docNum1Hit,
docNum: docNum1Hit,
}
}
type ResetablePostingsIterator interface {
ResetIterator()
}
type UnadornedPosting struct {
docNum uint64
}
func (p *UnadornedPosting) Number() uint64 {
return p.docNum
}
func (p *UnadornedPosting) Frequency() uint64 {
return 0
}
func (p *UnadornedPosting) Norm() float64 {
return 0
}
func (p *UnadornedPosting) Locations() []segment.Location {
return nil
}
func (p *UnadornedPosting) Size() int {
return reflectStaticSizeUnadornedPosting
}
================================================
FILE: index/upsidedown/analysis.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
index "github.com/blevesearch/bleve_index_api"
)
type IndexRow interface {
KeySize() int
KeyTo([]byte) (int, error)
Key() []byte
ValueSize() int
ValueTo([]byte) (int, error)
Value() []byte
}
type AnalysisResult struct {
DocID string
Rows []IndexRow
}
func (udc *UpsideDownCouch) Analyze(d index.Document) *AnalysisResult {
return udc.analyze(d)
}
func (udc *UpsideDownCouch) analyze(d index.Document) *AnalysisResult {
rv := &AnalysisResult{
DocID: d.ID(),
Rows: make([]IndexRow, 0, 100),
}
docIDBytes := []byte(d.ID())
// track our back index entries
backIndexStoredEntries := make([]*BackIndexStoreEntry, 0)
// information we collate as we merge fields with same name
fieldTermFreqs := make(map[uint16]index.TokenFrequencies)
fieldLengths := make(map[uint16]int)
fieldIncludeTermVectors := make(map[uint16]bool)
fieldNames := make(map[uint16]string)
analyzeField := func(field index.Field, storable bool) {
fieldIndex, newFieldRow := udc.fieldIndexOrNewRow(field.Name())
if newFieldRow != nil {
rv.Rows = append(rv.Rows, newFieldRow)
}
fieldNames[fieldIndex] = field.Name()
if field.Options().IsIndexed() {
field.Analyze()
fieldLength := field.AnalyzedLength()
tokenFreqs := field.AnalyzedTokenFrequencies()
existingFreqs := fieldTermFreqs[fieldIndex]
if existingFreqs == nil {
fieldTermFreqs[fieldIndex] = tokenFreqs
} else {
existingFreqs.MergeAll(field.Name(), tokenFreqs)
fieldTermFreqs[fieldIndex] = existingFreqs
}
fieldLengths[fieldIndex] += fieldLength
fieldIncludeTermVectors[fieldIndex] = field.Options().IncludeTermVectors()
}
if storable && field.Options().IsStored() {
rv.Rows, backIndexStoredEntries = udc.storeField(docIDBytes, field, fieldIndex, rv.Rows, backIndexStoredEntries)
}
}
// walk all the fields, record stored fields now
// place information about indexed fields into map
// this collates information across fields with
// same names (arrays)
d.VisitFields(func(field index.Field) {
analyzeField(field, true)
})
if d.HasComposite() {
for fieldIndex, tokenFreqs := range fieldTermFreqs {
// see if any of the composite fields need this
d.VisitComposite(func(field index.CompositeField) {
field.Compose(fieldNames[fieldIndex], fieldLengths[fieldIndex], tokenFreqs)
})
}
d.VisitComposite(func(field index.CompositeField) {
analyzeField(field, false)
})
}
rowsCapNeeded := len(rv.Rows) + 1
for _, tokenFreqs := range fieldTermFreqs {
rowsCapNeeded += len(tokenFreqs)
}
rv.Rows = append(make([]IndexRow, 0, rowsCapNeeded), rv.Rows...)
backIndexTermsEntries := make([]*BackIndexTermsEntry, 0, len(fieldTermFreqs))
// walk through the collated information and process
// once for each indexed field (unique name)
for fieldIndex, tokenFreqs := range fieldTermFreqs {
fieldLength := fieldLengths[fieldIndex]
includeTermVectors := fieldIncludeTermVectors[fieldIndex]
// encode this field
rv.Rows, backIndexTermsEntries = udc.indexField(docIDBytes, includeTermVectors, fieldIndex, fieldLength, tokenFreqs, rv.Rows, backIndexTermsEntries)
}
// build the back index row
backIndexRow := NewBackIndexRow(docIDBytes, backIndexTermsEntries, backIndexStoredEntries)
rv.Rows = append(rv.Rows, backIndexRow)
return rv
}
================================================
FILE: index/upsidedown/analysis_test.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"testing"
"github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/null"
"github.com/blevesearch/bleve/v2/registry"
index "github.com/blevesearch/bleve_index_api"
)
func TestAnalysisBug328(t *testing.T) {
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(standard.Name)
if err != nil {
t.Fatal(err)
}
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(null.Name, nil, analysisQueue)
if err != nil {
t.Fatal(err)
}
d := document.NewDocument("1")
f := document.NewTextFieldCustom("title", nil, []byte("bleve"), index.IndexField|index.IncludeTermVectors, analyzer)
d.AddField(f)
f = document.NewTextFieldCustom("body", nil, []byte("bleve"), index.IndexField|index.IncludeTermVectors, analyzer)
d.AddField(f)
cf := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, []string{}, index.IndexField|index.IncludeTermVectors)
d.AddField(cf)
rv := idx.(*UpsideDownCouch).analyze(d)
fieldIndexes := make(map[uint16]string)
for _, row := range rv.Rows {
if row, ok := row.(*FieldRow); ok {
fieldIndexes[row.index] = row.name
}
if row, ok := row.(*TermFrequencyRow); ok && string(row.term) == "bleve" {
for _, vec := range row.vectors {
if vec.field != row.field {
if fieldIndexes[row.field] != "_all" {
t.Errorf("row named %s field %d - vector field %d", fieldIndexes[row.field], row.field, vec.field)
}
}
}
}
}
}
func BenchmarkAnalyze(b *testing.B) {
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(standard.Name)
if err != nil {
b.Fatal(err)
}
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(null.Name, nil, analysisQueue)
if err != nil {
b.Fatal(err)
}
d := document.NewDocument("1")
f := document.NewTextFieldWithAnalyzer("desc", nil, bleveWikiArticle1K, analyzer)
d.AddField(f)
b.ResetTimer()
for i := 0; i < b.N; i++ {
rv := idx.(*UpsideDownCouch).analyze(d)
if len(rv.Rows) < 92 || len(rv.Rows) > 93 {
b.Fatalf("expected 512-13 rows, got %d", len(rv.Rows))
}
}
}
var bleveWikiArticle1K = []byte(`Boiling liquid expanding vapor explosion
From Wikipedia, the free encyclopedia
See also: Boiler explosion and Steam explosion
Flames subsequent to a flammable liquid BLEVE from a tanker. BLEVEs do not necessarily involve fire.
This article's tone or style may not reflect the encyclopedic tone used on Wikipedia. See Wikipedia's guide to writing better articles for suggestions. (July 2013)
A boiling liquid expanding vapor explosion (BLEVE, /ˈblɛviː/ blev-ee) is an explosion caused by the rupture of a vessel containing a pressurized liquid above its boiling point.[1]
Contents [hide]
1 Mechanism
1.1 Water example
1.2 BLEVEs without chemical reactions
2 Fires
3 Incidents
4 Safety measures
5 See also
6 References
7 External links
Mechanism[edit]
This section needs additional citations for verification. Please help improve this article by adding citations to reliable sources. Unsourced material may be challenged and removed. (July 2013)
There are three characteristics of liquids which are relevant to the discussion of a BLEVE:`)
================================================
FILE: index/upsidedown/benchmark_all.sh
================================================
#!/bin/sh
BENCHMARKS=`grep "func Benchmark" *_test.go | sed 's/.*func //' | sed s/\(.*{//`
for BENCHMARK in $BENCHMARKS
do
go test -v -run=xxx -bench=^$BENCHMARK$ -benchtime=10s -tags 'forestdb leveldb' | grep -v ok | grep -v PASS
done
================================================
FILE: index/upsidedown/benchmark_boltdb_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"testing"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb"
)
var boltTestConfig = map[string]interface{}{
"path": "test",
}
func BenchmarkBoltDBIndexing1Workers(b *testing.B) {
CommonBenchmarkIndex(b, boltdb.Name, boltTestConfig, DestroyTest, 1)
}
func BenchmarkBoltDBIndexing2Workers(b *testing.B) {
CommonBenchmarkIndex(b, boltdb.Name, boltTestConfig, DestroyTest, 2)
}
func BenchmarkBoltDBIndexing4Workers(b *testing.B) {
CommonBenchmarkIndex(b, boltdb.Name, boltTestConfig, DestroyTest, 4)
}
// batches
func BenchmarkBoltDBIndexing1Workers10Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 1, 10)
}
func BenchmarkBoltDBIndexing2Workers10Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 2, 10)
}
func BenchmarkBoltDBIndexing4Workers10Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 4, 10)
}
func BenchmarkBoltDBIndexing1Workers100Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 1, 100)
}
func BenchmarkBoltDBIndexing2Workers100Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 2, 100)
}
func BenchmarkBoltDBIndexing4Workers100Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 4, 100)
}
func BenchmarkBoltBIndexing1Workers1000Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 1, 1000)
}
func BenchmarkBoltBIndexing2Workers1000Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 2, 1000)
}
func BenchmarkBoltBIndexing4Workers1000Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, boltdb.Name, boltTestConfig, DestroyTest, 4, 1000)
}
================================================
FILE: index/upsidedown/benchmark_common_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"os"
"strconv"
"testing"
_ "github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/registry"
index "github.com/blevesearch/bleve_index_api"
)
var benchmarkDocBodies = []string{
"A boiling liquid expanding vapor explosion (BLEVE, /ˈblɛviː/ blev-ee) is an explosion caused by the rupture of a vessel containing a pressurized liquid above its boiling point.",
"A boiler explosion is a catastrophic failure of a boiler. As seen today, boiler explosions are of two kinds. One kind is a failure of the pressure parts of the steam and water sides. There can be many different causes, such as failure of the safety valve, corrosion of critical parts of the boiler, or low water level. Corrosion along the edges of lap joints was a common cause of early boiler explosions.",
"A boiler is a closed vessel in which water or other fluid is heated. The fluid does not necessarily boil. (In North America the term \"furnace\" is normally used if the purpose is not actually to boil the fluid.) The heated or vaporized fluid exits the boiler for use in various processes or heating applications,[1][2] including central heating, boiler-based power generation, cooking, and sanitation.",
"A pressure vessel is a closed container designed to hold gases or liquids at a pressure substantially different from the ambient pressure.",
"Pressure (symbol: p or P) is the ratio of force to the area over which that force is distributed.",
"Liquid is one of the four fundamental states of matter (the others being solid, gas, and plasma), and is the only state with a definite volume but no fixed shape.",
"The boiling point of a substance is the temperature at which the vapor pressure of the liquid equals the pressure surrounding the liquid[1][2] and the liquid changes into a vapor.",
"Vapor pressure or equilibrium vapor pressure is defined as the pressure exerted by a vapor in thermodynamic equilibrium with its condensed phases (solid or liquid) at a given temperature in a closed system.",
"Industrial gases are a group of gases that are specifically manufactured for use in a wide range of industries, which include oil and gas, petrochemicals, chemicals, power, mining, steelmaking, metals, environmental protection, medicine, pharmaceuticals, biotechnology, food, water, fertilizers, nuclear power, electronics and aerospace.",
"The expansion ratio of a liquefied and cryogenic substance is the volume of a given amount of that substance in liquid form compared to the volume of the same amount of substance in gaseous form, at room temperature and normal atmospheric pressure.",
}
type KVStoreDestroy func() error
func DestroyTest() error {
return os.RemoveAll("test")
}
func CommonBenchmarkIndex(b *testing.B, storeName string, storeConfig map[string]interface{}, destroy KVStoreDestroy, analysisWorkers int) {
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed("standard")
if err != nil {
b.Fatal(err)
}
indexDocument := document.NewDocument("").
AddField(document.NewTextFieldWithAnalyzer("body", []uint64{}, []byte(benchmarkDocBodies[0]), analyzer))
b.ResetTimer()
b.StopTimer()
for i := 0; i < b.N; i++ {
analysisQueue := index.NewAnalysisQueue(analysisWorkers)
idx, err := NewUpsideDownCouch(storeName, storeConfig, analysisQueue)
if err != nil {
b.Fatal(err)
}
err = idx.Open()
if err != nil {
b.Fatal(err)
}
indexDocument.SetID(strconv.Itoa(i))
// just time the indexing portion
b.StartTimer()
err = idx.Update(indexDocument)
if err != nil {
b.Fatal(err)
}
b.StopTimer()
err = idx.Close()
if err != nil {
b.Fatal(err)
}
err = destroy()
if err != nil {
b.Fatal(err)
}
analysisQueue.Close()
}
}
func CommonBenchmarkIndexBatch(b *testing.B, storeName string, storeConfig map[string]interface{}, destroy KVStoreDestroy, analysisWorkers, batchSize int) {
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed("standard")
if err != nil {
b.Fatal(err)
}
b.ResetTimer()
b.StopTimer()
for i := 0; i < b.N; i++ {
analysisQueue := index.NewAnalysisQueue(analysisWorkers)
idx, err := NewUpsideDownCouch(storeName, storeConfig, analysisQueue)
if err != nil {
b.Fatal(err)
}
err = idx.Open()
if err != nil {
b.Fatal(err)
}
b.StartTimer()
batch := index.NewBatch()
for j := 0; j < 1000; j++ {
if j%batchSize == 0 {
if len(batch.IndexOps) > 0 {
err := idx.Batch(batch)
if err != nil {
b.Fatal(err)
}
}
batch = index.NewBatch()
}
indexDocument := document.NewDocument("").
AddField(document.NewTextFieldWithAnalyzer("body", []uint64{}, []byte(benchmarkDocBodies[j%10]), analyzer))
indexDocument.SetID(strconv.Itoa(i) + "-" + strconv.Itoa(j))
batch.Update(indexDocument)
}
// close last batch
if len(batch.IndexOps) > 0 {
err := idx.Batch(batch)
if err != nil {
b.Fatal(err)
}
}
b.StopTimer()
err = idx.Close()
if err != nil {
b.Fatal(err)
}
err = destroy()
if err != nil {
b.Fatal(err)
}
analysisQueue.Close()
}
}
================================================
FILE: index/upsidedown/benchmark_gtreap_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"testing"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
)
func BenchmarkGTreapIndexing1Workers(b *testing.B) {
CommonBenchmarkIndex(b, gtreap.Name, nil, DestroyTest, 1)
}
func BenchmarkGTreapIndexing2Workers(b *testing.B) {
CommonBenchmarkIndex(b, gtreap.Name, nil, DestroyTest, 2)
}
func BenchmarkGTreapIndexing4Workers(b *testing.B) {
CommonBenchmarkIndex(b, gtreap.Name, nil, DestroyTest, 4)
}
// batches
func BenchmarkGTreapIndexing1Workers10Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 1, 10)
}
func BenchmarkGTreapIndexing2Workers10Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 2, 10)
}
func BenchmarkGTreapIndexing4Workers10Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 4, 10)
}
func BenchmarkGTreapIndexing1Workers100Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 1, 100)
}
func BenchmarkGTreapIndexing2Workers100Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 2, 100)
}
func BenchmarkGTreapIndexing4Workers100Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 4, 100)
}
func BenchmarkGTreapIndexing1Workers1000Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 1, 1000)
}
func BenchmarkGTreapIndexing2Workers1000Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 2, 1000)
}
func BenchmarkGTreapIndexing4Workers1000Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, gtreap.Name, nil, DestroyTest, 4, 1000)
}
================================================
FILE: index/upsidedown/benchmark_null_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"testing"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/null"
)
func BenchmarkNullIndexing1Workers(b *testing.B) {
CommonBenchmarkIndex(b, null.Name, nil, DestroyTest, 1)
}
func BenchmarkNullIndexing2Workers(b *testing.B) {
CommonBenchmarkIndex(b, null.Name, nil, DestroyTest, 2)
}
func BenchmarkNullIndexing4Workers(b *testing.B) {
CommonBenchmarkIndex(b, null.Name, nil, DestroyTest, 4)
}
// batches
func BenchmarkNullIndexing1Workers10Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 1, 10)
}
func BenchmarkNullIndexing2Workers10Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 2, 10)
}
func BenchmarkNullIndexing4Workers10Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 4, 10)
}
func BenchmarkNullIndexing1Workers100Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 1, 100)
}
func BenchmarkNullIndexing2Workers100Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 2, 100)
}
func BenchmarkNullIndexing4Workers100Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 4, 100)
}
func BenchmarkNullIndexing1Workers1000Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 1, 1000)
}
func BenchmarkNullIndexing2Workers1000Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 2, 1000)
}
func BenchmarkNullIndexing4Workers1000Batch(b *testing.B) {
CommonBenchmarkIndexBatch(b, null.Name, nil, DestroyTest, 4, 1000)
}
================================================
FILE: index/upsidedown/dump.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"bytes"
"sort"
"github.com/blevesearch/upsidedown_store_api"
)
// the functions in this file are only intended to be used by
// the bleve_dump utility and the debug http handlers
// if your application relies on them, you're doing something wrong
// they may change or be removed at any time
func dumpPrefix(kvreader store.KVReader, rv chan interface{}, prefix []byte) {
start := prefix
if start == nil {
start = []byte{0}
}
it := kvreader.PrefixIterator(start)
defer func() {
cerr := it.Close()
if cerr != nil {
rv <- cerr
}
}()
key, val, valid := it.Current()
for valid {
ck := make([]byte, len(key))
copy(ck, key)
cv := make([]byte, len(val))
copy(cv, val)
row, err := ParseFromKeyValue(ck, cv)
if err != nil {
rv <- err
return
}
rv <- row
it.Next()
key, val, valid = it.Current()
}
}
func dumpRange(kvreader store.KVReader, rv chan interface{}, start, end []byte) {
it := kvreader.RangeIterator(start, end)
defer func() {
cerr := it.Close()
if cerr != nil {
rv <- cerr
}
}()
key, val, valid := it.Current()
for valid {
ck := make([]byte, len(key))
copy(ck, key)
cv := make([]byte, len(val))
copy(cv, val)
row, err := ParseFromKeyValue(ck, cv)
if err != nil {
rv <- err
return
}
rv <- row
it.Next()
key, val, valid = it.Current()
}
}
func (i *IndexReader) DumpAll() chan interface{} {
rv := make(chan interface{})
go func() {
defer close(rv)
dumpRange(i.kvreader, rv, nil, nil)
}()
return rv
}
func (i *IndexReader) DumpFields() chan interface{} {
rv := make(chan interface{})
go func() {
defer close(rv)
dumpPrefix(i.kvreader, rv, []byte{'f'})
}()
return rv
}
type keyset [][]byte
func (k keyset) Len() int { return len(k) }
func (k keyset) Swap(i, j int) { k[i], k[j] = k[j], k[i] }
func (k keyset) Less(i, j int) bool { return bytes.Compare(k[i], k[j]) < 0 }
// DumpDoc returns all rows in the index related to this doc id
func (i *IndexReader) DumpDoc(id string) chan interface{} {
idBytes := []byte(id)
rv := make(chan interface{})
go func() {
defer close(rv)
back, err := backIndexRowForDoc(i.kvreader, []byte(id))
if err != nil {
rv <- err
return
}
// no such doc
if back == nil {
return
}
// build sorted list of term keys
keys := make(keyset, 0)
for _, entry := range back.termsEntries {
for i := range entry.Terms {
tfr := NewTermFrequencyRow([]byte(entry.Terms[i]), uint16(*entry.Field), idBytes, 0, 0)
key := tfr.Key()
keys = append(keys, key)
}
}
sort.Sort(keys)
// first add all the stored rows
storedRowPrefix := NewStoredRow(idBytes, 0, []uint64{}, 'x', []byte{}).ScanPrefixForDoc()
dumpPrefix(i.kvreader, rv, storedRowPrefix)
// now walk term keys in order and add them as well
if len(keys) > 0 {
it := i.kvreader.RangeIterator(keys[0], nil)
defer func() {
cerr := it.Close()
if cerr != nil {
rv <- cerr
}
}()
for _, key := range keys {
it.Seek(key)
rkey, rval, valid := it.Current()
if !valid {
break
}
rck := make([]byte, len(rkey))
copy(rck, key)
rcv := make([]byte, len(rval))
copy(rcv, rval)
row, err := ParseFromKeyValue(rck, rcv)
if err != nil {
rv <- err
return
}
rv <- row
}
}
}()
return rv
}
================================================
FILE: index/upsidedown/dump_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"testing"
"time"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb"
index "github.com/blevesearch/bleve_index_api"
"github.com/blevesearch/bleve/v2/document"
)
func TestDump(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), index.IndexField|index.StoreField))
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 35.99, index.IndexField|index.StoreField))
dateField, err := document.NewDateTimeFieldWithIndexingOptions("unixEpoch", []uint64{}, time.Unix(0, 0), time.RFC3339, index.IndexField|index.StoreField)
if err != nil {
t.Error(err)
}
doc.AddField(dateField)
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
doc = document.NewDocument("2")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test2"), index.IndexField|index.StoreField))
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 35.99, index.IndexField|index.StoreField))
dateField, err = document.NewDateTimeFieldWithIndexingOptions("unixEpoch", []uint64{}, time.Unix(0, 0), time.RFC3339, index.IndexField|index.StoreField)
if err != nil {
t.Error(err)
}
doc.AddField(dateField)
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
fieldsCount := 0
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
upsideDownReader, ok := reader.(*IndexReader)
if !ok {
t.Fatal("dump is only supported by index type upsidedown")
}
fieldsRows := upsideDownReader.DumpFields()
for range fieldsRows {
fieldsCount++
}
if fieldsCount != 3 {
t.Errorf("expected 3 fields, got %d", fieldsCount)
}
// 1 text term
// 16 numeric terms
// 16 date terms
// 3 stored fields
expectedDocRowCount := int(1 + (2 * (64 / document.DefaultPrecisionStep)) + 3)
docRowCount := 0
docRows := upsideDownReader.DumpDoc("1")
for range docRows {
docRowCount++
}
if docRowCount != expectedDocRowCount {
t.Errorf("expected %d rows for document, got %d", expectedDocRowCount, docRowCount)
}
docRowCount = 0
docRows = upsideDownReader.DumpDoc("2")
for range docRows {
docRowCount++
}
if docRowCount != expectedDocRowCount {
t.Errorf("expected %d rows for document, got %d", expectedDocRowCount, docRowCount)
}
// 1 version
// fieldsCount field rows
// 2 docs * expectedDocRowCount
// 2 back index rows
// 2 text term row count (2 different text terms)
// 16 numeric term row counts (shared for both docs, same numeric value)
// 16 date term row counts (shared for both docs, same date value)
expectedAllRowCount := int(1 + fieldsCount + (2 * expectedDocRowCount) + 2 + 2 + int((2 * (64 / document.DefaultPrecisionStep))))
allRowCount := 0
allRows := upsideDownReader.DumpAll()
for range allRows {
allRowCount++
}
if allRowCount != expectedAllRowCount {
t.Errorf("expected %d rows for all, got %d", expectedAllRowCount, allRowCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
}
================================================
FILE: index/upsidedown/field_cache.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"sync"
)
type FieldCache struct {
fieldIndexes map[string]uint16
indexFields []string
lastFieldIndex int
mutex sync.RWMutex
}
func NewFieldCache() *FieldCache {
return &FieldCache{
fieldIndexes: make(map[string]uint16),
lastFieldIndex: -1,
}
}
func (f *FieldCache) AddExisting(field string, index uint16) {
f.mutex.Lock()
f.addLOCKED(field, index)
f.mutex.Unlock()
}
func (f *FieldCache) addLOCKED(field string, index uint16) uint16 {
f.fieldIndexes[field] = index
if len(f.indexFields) < int(index)+1 {
prevIndexFields := f.indexFields
f.indexFields = make([]string, int(index)+16)
copy(f.indexFields, prevIndexFields)
}
f.indexFields[int(index)] = field
if int(index) > f.lastFieldIndex {
f.lastFieldIndex = int(index)
}
return index
}
// FieldNamed returns the index of the field, and whether or not it existed
// before this call. if createIfMissing is true, and new field index is assigned
// but the second return value will still be false
func (f *FieldCache) FieldNamed(field string, createIfMissing bool) (uint16, bool) {
f.mutex.RLock()
if index, ok := f.fieldIndexes[field]; ok {
f.mutex.RUnlock()
return index, true
} else if !createIfMissing {
f.mutex.RUnlock()
return 0, false
}
// trade read lock for write lock
f.mutex.RUnlock()
f.mutex.Lock()
// need to check again with write lock
if index, ok := f.fieldIndexes[field]; ok {
f.mutex.Unlock()
return index, true
}
// assign next field id
index := f.addLOCKED(field, uint16(f.lastFieldIndex+1))
f.mutex.Unlock()
return index, false
}
func (f *FieldCache) FieldIndexed(index uint16) (field string) {
f.mutex.RLock()
if int(index) < len(f.indexFields) {
field = f.indexFields[int(index)]
}
f.mutex.RUnlock()
return field
}
================================================
FILE: index/upsidedown/field_dict.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"fmt"
index "github.com/blevesearch/bleve_index_api"
store "github.com/blevesearch/upsidedown_store_api"
)
type UpsideDownCouchFieldDict struct {
indexReader *IndexReader
iterator store.KVIterator
dictRow *DictionaryRow
dictEntry *index.DictEntry
field uint16
}
func newUpsideDownCouchFieldDict(indexReader *IndexReader, field uint16, startTerm, endTerm []byte) (*UpsideDownCouchFieldDict, error) {
startKey := NewDictionaryRow(startTerm, field, 0).Key()
if endTerm == nil {
endTerm = []byte{ByteSeparator}
} else {
endTerm = incrementBytes(endTerm)
}
endKey := NewDictionaryRow(endTerm, field, 0).Key()
it := indexReader.kvreader.RangeIterator(startKey, endKey)
return &UpsideDownCouchFieldDict{
indexReader: indexReader,
iterator: it,
dictRow: &DictionaryRow{}, // Pre-alloced, reused row.
dictEntry: &index.DictEntry{}, // Pre-alloced, reused entry.
field: field,
}, nil
}
func (r *UpsideDownCouchFieldDict) BytesRead() uint64 {
return 0
}
func (r *UpsideDownCouchFieldDict) Next() (*index.DictEntry, error) {
key, val, valid := r.iterator.Current()
if !valid {
return nil, nil
}
err := r.dictRow.parseDictionaryK(key)
if err != nil {
return nil, fmt.Errorf("unexpected error parsing dictionary row key: %v", err)
}
err = r.dictRow.parseDictionaryV(val)
if err != nil {
return nil, fmt.Errorf("unexpected error parsing dictionary row val: %v", err)
}
r.dictEntry.Term = string(r.dictRow.term)
r.dictEntry.Count = r.dictRow.count
// advance the iterator to the next term
r.iterator.Next()
return r.dictEntry, nil
}
func (r *UpsideDownCouchFieldDict) Cardinality() int {
return 0
}
func (r *UpsideDownCouchFieldDict) Close() error {
return r.iterator.Close()
}
================================================
FILE: index/upsidedown/field_dict_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb"
index "github.com/blevesearch/bleve_index_api"
)
func TestIndexFieldDict(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
doc = document.NewDocument("2")
doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer))
doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), index.IndexField|index.IncludeTermVectors, testAnalyzer))
doc.AddField(document.NewTextFieldCustom("prefix", []uint64{}, []byte("bob cat cats catting dog doggy zoo"), index.IndexField|index.IncludeTermVectors, testAnalyzer))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
dict, err := indexReader.FieldDict("name")
if err != nil {
t.Errorf("error creating reader: %v", err)
}
defer func() {
err := dict.Close()
if err != nil {
t.Fatal(err)
}
}()
termCount := 0
curr, err := dict.Next()
for err == nil && curr != nil {
termCount++
if curr.Term != "test" {
t.Errorf("expected term to be 'test', got '%s'", curr.Term)
}
curr, err = dict.Next()
}
if termCount != 1 {
t.Errorf("expected 1 term for this field, got %d", termCount)
}
dict2, err := indexReader.FieldDict("desc")
if err != nil {
t.Errorf("error creating reader: %v", err)
}
defer func() {
err := dict2.Close()
if err != nil {
t.Fatal(err)
}
}()
termCount = 0
terms := make([]string, 0)
curr, err = dict2.Next()
for err == nil && curr != nil {
termCount++
terms = append(terms, curr.Term)
curr, err = dict2.Next()
}
if termCount != 3 {
t.Errorf("expected 3 term for this field, got %d", termCount)
}
expectedTerms := []string{"eat", "more", "rice"}
if !reflect.DeepEqual(expectedTerms, terms) {
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
}
// test start and end range
dict3, err := indexReader.FieldDictRange("desc", []byte("fun"), []byte("nice"))
if err != nil {
t.Errorf("error creating reader: %v", err)
}
defer func() {
err := dict3.Close()
if err != nil {
t.Fatal(err)
}
}()
termCount = 0
terms = make([]string, 0)
curr, err = dict3.Next()
for err == nil && curr != nil {
termCount++
terms = append(terms, curr.Term)
curr, err = dict3.Next()
}
if termCount != 1 {
t.Errorf("expected 1 term for this field, got %d", termCount)
}
expectedTerms = []string{"more"}
if !reflect.DeepEqual(expectedTerms, terms) {
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
}
// test use case for prefix
dict4, err := indexReader.FieldDictPrefix("prefix", []byte("cat"))
if err != nil {
t.Errorf("error creating reader: %v", err)
}
defer func() {
err := dict4.Close()
if err != nil {
t.Fatal(err)
}
}()
termCount = 0
terms = make([]string, 0)
curr, err = dict4.Next()
for err == nil && curr != nil {
termCount++
terms = append(terms, curr.Term)
curr, err = dict4.Next()
}
if termCount != 3 {
t.Errorf("expected 3 term for this field, got %d", termCount)
}
expectedTerms = []string{"cat", "cats", "catting"}
if !reflect.DeepEqual(expectedTerms, terms) {
t.Errorf("expected %#v, got %#v", expectedTerms, terms)
}
}
================================================
FILE: index/upsidedown/index_reader.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"context"
"reflect"
"github.com/blevesearch/bleve/v2/document"
index "github.com/blevesearch/bleve_index_api"
store "github.com/blevesearch/upsidedown_store_api"
)
var reflectStaticSizeIndexReader int
func init() {
var ir IndexReader
reflectStaticSizeIndexReader = int(reflect.TypeOf(ir).Size())
}
type IndexReader struct {
index *UpsideDownCouch
kvreader store.KVReader
docCount uint64
}
func (i *IndexReader) TermFieldReader(ctx context.Context, term []byte, fieldName string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
if fieldExists {
return newUpsideDownCouchTermFieldReader(i, term, uint16(fieldIndex), includeFreq, includeNorm, includeTermVectors)
}
return newUpsideDownCouchTermFieldReader(i, []byte{ByteSeparator}, ^uint16(0), includeFreq, includeNorm, includeTermVectors)
}
func (i *IndexReader) FieldDict(fieldName string) (index.FieldDict, error) {
return i.FieldDictRange(fieldName, nil, nil)
}
func (i *IndexReader) FieldDictRange(fieldName string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
fieldIndex, fieldExists := i.index.fieldCache.FieldNamed(fieldName, false)
if fieldExists {
return newUpsideDownCouchFieldDict(i, uint16(fieldIndex), startTerm, endTerm)
}
return newUpsideDownCouchFieldDict(i, ^uint16(0), []byte{ByteSeparator}, []byte{})
}
func (i *IndexReader) FieldDictPrefix(fieldName string, termPrefix []byte) (index.FieldDict, error) {
return i.FieldDictRange(fieldName, termPrefix, termPrefix)
}
func (i *IndexReader) DocIDReaderAll() (index.DocIDReader, error) {
return newUpsideDownCouchDocIDReader(i)
}
func (i *IndexReader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
return newUpsideDownCouchDocIDReaderOnly(i, ids)
}
func (i *IndexReader) Document(id string) (doc index.Document, err error) {
// first hit the back index to confirm doc exists
var backIndexRow *BackIndexRow
backIndexRow, err = backIndexRowForDoc(i.kvreader, []byte(id))
if err != nil {
return
}
if backIndexRow == nil {
return
}
rvd := document.NewDocument(id)
storedRow := NewStoredRow([]byte(id), 0, []uint64{}, 'x', nil)
storedRowScanPrefix := storedRow.ScanPrefixForDoc()
it := i.kvreader.PrefixIterator(storedRowScanPrefix)
defer func() {
if cerr := it.Close(); err == nil && cerr != nil {
err = cerr
}
}()
key, val, valid := it.Current()
for valid {
safeVal := make([]byte, len(val))
copy(safeVal, val)
var row *StoredRow
row, err = NewStoredRowKV(key, safeVal)
if err != nil {
return nil, err
}
if row != nil {
fieldName := i.index.fieldCache.FieldIndexed(row.field)
field := decodeFieldType(row.typ, fieldName, row.arrayPositions, row.value)
if field != nil {
rvd.AddField(field)
}
}
it.Next()
key, val, valid = it.Current()
}
return rvd, nil
}
func (i *IndexReader) documentVisitFieldTerms(id index.IndexInternalID, fields []string, visitor index.DocValueVisitor) error {
fieldsMap := make(map[uint16]string, len(fields))
for _, f := range fields {
id, ok := i.index.fieldCache.FieldNamed(f, false)
if ok {
fieldsMap[id] = f
}
}
tempRow := BackIndexRow{
doc: id,
}
keyBuf := GetRowBuffer()
if tempRow.KeySize() > len(keyBuf.buf) {
keyBuf.buf = make([]byte, 2*tempRow.KeySize())
}
defer PutRowBuffer(keyBuf)
keySize, err := tempRow.KeyTo(keyBuf.buf)
if err != nil {
return err
}
value, err := i.kvreader.Get(keyBuf.buf[:keySize])
if err != nil {
return err
}
if value == nil {
return nil
}
return visitBackIndexRow(value, func(field uint32, term []byte) {
if field, ok := fieldsMap[uint16(field)]; ok {
visitor(field, term)
}
})
}
func (i *IndexReader) Fields() (fields []string, err error) {
fields = make([]string, 0)
it := i.kvreader.PrefixIterator([]byte{'f'})
defer func() {
if cerr := it.Close(); err == nil && cerr != nil {
err = cerr
}
}()
key, val, valid := it.Current()
for valid {
var row UpsideDownCouchRow
row, err = ParseFromKeyValue(key, val)
if err != nil {
fields = nil
return
}
if row != nil {
fieldRow, ok := row.(*FieldRow)
if ok {
fields = append(fields, fieldRow.name)
}
}
it.Next()
key, val, valid = it.Current()
}
return
}
func (i *IndexReader) GetInternal(key []byte) ([]byte, error) {
internalRow := NewInternalRow(key, nil)
return i.kvreader.Get(internalRow.Key())
}
func (i *IndexReader) DocCount() (uint64, error) {
return i.docCount, nil
}
func (i *IndexReader) Close() error {
return i.kvreader.Close()
}
func (i *IndexReader) ExternalID(id index.IndexInternalID) (string, error) {
return string(id), nil
}
func (i *IndexReader) InternalID(id string) (index.IndexInternalID, error) {
return index.IndexInternalID(id), nil
}
func incrementBytes(in []byte) []byte {
rv := make([]byte, len(in))
copy(rv, in)
for i := len(rv) - 1; i >= 0; i-- {
rv[i] = rv[i] + 1
if rv[i] != 0 {
// didn't overflow, so stop
break
}
}
return rv
}
func (i *IndexReader) DocValueReader(fields []string) (index.DocValueReader, error) {
return &DocValueReader{i: i, fields: fields}, nil
}
type DocValueReader struct {
i *IndexReader
fields []string
}
func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID,
visitor index.DocValueVisitor) error {
return dvr.i.documentVisitFieldTerms(id, dvr.fields, visitor)
}
func (dvr *DocValueReader) BytesRead() uint64 { return 0 }
================================================
FILE: index/upsidedown/protoc-README.md
================================================
## Instructions for generating new go stubs using upsidedown.proto
1. Download latest of protoc-gen-go
```
go install google.golang.org/protobuf/cmd/protoc-gen-go@latest
```
2. To generate `upsidedown.pb.go` using upsdidedown.proto:
```
protoc --go_out=. --go_opt=Mindex/upsidedown/upsidedown.proto=index/upsidedown/ index/upsidedown/upsidedown.proto
```
3. Manually add back Size and MarshalTo methods for BackIndexRowValue, BackIndexTermsEntry, BackIndexStoreEntry to support upside_down.
================================================
FILE: index/upsidedown/reader.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"bytes"
"reflect"
"sort"
"sync/atomic"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
"github.com/blevesearch/upsidedown_store_api"
)
var reflectStaticSizeUpsideDownCouchTermFieldReader int
var reflectStaticSizeUpsideDownCouchDocIDReader int
func init() {
var tfr UpsideDownCouchTermFieldReader
reflectStaticSizeUpsideDownCouchTermFieldReader =
int(reflect.TypeOf(tfr).Size())
var cdr UpsideDownCouchDocIDReader
reflectStaticSizeUpsideDownCouchDocIDReader =
int(reflect.TypeOf(cdr).Size())
}
type UpsideDownCouchTermFieldReader struct {
count uint64
indexReader *IndexReader
iterator store.KVIterator
term []byte
tfrNext *TermFrequencyRow
tfrPrealloc TermFrequencyRow
keyBuf []byte
field uint16
includeTermVectors bool
}
func (r *UpsideDownCouchTermFieldReader) Size() int {
sizeInBytes := reflectStaticSizeUpsideDownCouchTermFieldReader + size.SizeOfPtr +
len(r.term) +
r.tfrPrealloc.Size() +
len(r.keyBuf)
if r.tfrNext != nil {
sizeInBytes += r.tfrNext.Size()
}
return sizeInBytes
}
func newUpsideDownCouchTermFieldReader(indexReader *IndexReader, term []byte, field uint16, includeFreq, includeNorm, includeTermVectors bool) (*UpsideDownCouchTermFieldReader, error) {
bufNeeded := termFrequencyRowKeySize(term, nil)
if bufNeeded < dictionaryRowKeySize(term) {
bufNeeded = dictionaryRowKeySize(term)
}
buf := make([]byte, bufNeeded)
bufUsed := dictionaryRowKeyTo(buf, field, term)
val, err := indexReader.kvreader.Get(buf[:bufUsed])
if err != nil {
return nil, err
}
if val == nil {
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
rv := &UpsideDownCouchTermFieldReader{
count: 0,
term: term,
field: field,
includeTermVectors: includeTermVectors,
}
rv.tfrNext = &rv.tfrPrealloc
return rv, nil
}
count, err := dictionaryRowParseV(val)
if err != nil {
return nil, err
}
bufUsed = termFrequencyRowKeyTo(buf, field, term, nil)
it := indexReader.kvreader.PrefixIterator(buf[:bufUsed])
atomic.AddUint64(&indexReader.index.stats.termSearchersStarted, uint64(1))
return &UpsideDownCouchTermFieldReader{
indexReader: indexReader,
iterator: it,
count: count,
term: term,
field: field,
includeTermVectors: includeTermVectors,
}, nil
}
func (r *UpsideDownCouchTermFieldReader) Count() uint64 {
return r.count
}
func (r *UpsideDownCouchTermFieldReader) Next(preAlloced *index.TermFieldDoc) (*index.TermFieldDoc, error) {
if r.iterator != nil {
// We treat tfrNext also like an initialization flag, which
// tells us whether we need to invoke the underlying
// iterator.Next(). The first time, don't call iterator.Next().
if r.tfrNext != nil {
r.iterator.Next()
} else {
r.tfrNext = &r.tfrPrealloc
}
key, val, valid := r.iterator.Current()
if valid {
tfr := r.tfrNext
err := tfr.parseKDoc(key, r.term)
if err != nil {
return nil, err
}
err = tfr.parseV(val, r.includeTermVectors)
if err != nil {
return nil, err
}
rv := preAlloced
if rv == nil {
rv = &index.TermFieldDoc{}
}
rv.ID = append(rv.ID, tfr.doc...)
rv.Freq = tfr.freq
rv.Norm = float64(tfr.norm)
if tfr.vectors != nil {
rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors)
}
return rv, nil
}
}
return nil, nil
}
func (r *UpsideDownCouchTermFieldReader) Advance(docID index.IndexInternalID, preAlloced *index.TermFieldDoc) (rv *index.TermFieldDoc, err error) {
if r.iterator != nil {
if r.tfrNext == nil {
r.tfrNext = &TermFrequencyRow{}
}
tfr := InitTermFrequencyRow(r.tfrNext, r.term, r.field, docID, 0, 0)
r.keyBuf, err = tfr.KeyAppendTo(r.keyBuf[:0])
if err != nil {
return nil, err
}
r.iterator.Seek(r.keyBuf)
key, val, valid := r.iterator.Current()
if valid {
err := tfr.parseKDoc(key, r.term)
if err != nil {
return nil, err
}
err = tfr.parseV(val, r.includeTermVectors)
if err != nil {
return nil, err
}
rv = preAlloced
if rv == nil {
rv = &index.TermFieldDoc{}
}
rv.ID = append(rv.ID, tfr.doc...)
rv.Freq = tfr.freq
rv.Norm = float64(tfr.norm)
if tfr.vectors != nil {
rv.Vectors = r.indexReader.index.termFieldVectorsFromTermVectors(tfr.vectors)
}
return rv, nil
}
}
return nil, nil
}
func (r *UpsideDownCouchTermFieldReader) Close() error {
if r.indexReader != nil {
atomic.AddUint64(&r.indexReader.index.stats.termSearchersFinished, uint64(1))
}
if r.iterator != nil {
return r.iterator.Close()
}
return nil
}
type UpsideDownCouchDocIDReader struct {
indexReader *IndexReader
iterator store.KVIterator
only []string
onlyPos int
onlyMode bool
}
func (r *UpsideDownCouchDocIDReader) Size() int {
sizeInBytes := reflectStaticSizeUpsideDownCouchDocIDReader +
reflectStaticSizeIndexReader + size.SizeOfPtr
for _, entry := range r.only {
sizeInBytes += size.SizeOfString + len(entry)
}
return sizeInBytes
}
func newUpsideDownCouchDocIDReader(indexReader *IndexReader) (*UpsideDownCouchDocIDReader, error) {
startBytes := []byte{0x0}
endBytes := []byte{0xff}
bisr := NewBackIndexRow(startBytes, nil, nil)
bier := NewBackIndexRow(endBytes, nil, nil)
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key())
return &UpsideDownCouchDocIDReader{
indexReader: indexReader,
iterator: it,
}, nil
}
func newUpsideDownCouchDocIDReaderOnly(indexReader *IndexReader, ids []string) (*UpsideDownCouchDocIDReader, error) {
// we don't actually own the list of ids, so if before we sort we must copy
idsCopy := make([]string, len(ids))
copy(idsCopy, ids)
// ensure ids are sorted
sort.Strings(idsCopy)
startBytes := []byte{0x0}
if len(idsCopy) > 0 {
startBytes = []byte(idsCopy[0])
}
endBytes := []byte{0xff}
if len(idsCopy) > 0 {
endBytes = incrementBytes([]byte(idsCopy[len(idsCopy)-1]))
}
bisr := NewBackIndexRow(startBytes, nil, nil)
bier := NewBackIndexRow(endBytes, nil, nil)
it := indexReader.kvreader.RangeIterator(bisr.Key(), bier.Key())
return &UpsideDownCouchDocIDReader{
indexReader: indexReader,
iterator: it,
only: idsCopy,
onlyMode: true,
}, nil
}
func (r *UpsideDownCouchDocIDReader) Next() (index.IndexInternalID, error) {
key, val, valid := r.iterator.Current()
if r.onlyMode {
var rv index.IndexInternalID
for valid && r.onlyPos < len(r.only) {
br, err := NewBackIndexRowKV(key, val)
if err != nil {
return nil, err
}
if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) {
ok := r.nextOnly()
if !ok {
return nil, nil
}
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
key, val, valid = r.iterator.Current()
continue
} else {
rv = append([]byte(nil), br.doc...)
break
}
}
if valid && r.onlyPos < len(r.only) {
ok := r.nextOnly()
if ok {
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
}
return rv, nil
}
} else {
if valid {
br, err := NewBackIndexRowKV(key, val)
if err != nil {
return nil, err
}
rv := append([]byte(nil), br.doc...)
r.iterator.Next()
return rv, nil
}
}
return nil, nil
}
func (r *UpsideDownCouchDocIDReader) Advance(docID index.IndexInternalID) (index.IndexInternalID, error) {
if r.onlyMode {
r.onlyPos = sort.SearchStrings(r.only, string(docID))
if r.onlyPos >= len(r.only) {
// advanced to key after our last only key
return nil, nil
}
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
key, val, valid := r.iterator.Current()
var rv index.IndexInternalID
for valid && r.onlyPos < len(r.only) {
br, err := NewBackIndexRowKV(key, val)
if err != nil {
return nil, err
}
if !bytes.Equal(br.doc, []byte(r.only[r.onlyPos])) {
// the only key we seek'd to didn't exist
// now look for the closest key that did exist in only
r.onlyPos = sort.SearchStrings(r.only, string(br.doc))
if r.onlyPos >= len(r.only) {
// advanced to key after our last only key
return nil, nil
}
// now seek to this new only key
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
key, val, valid = r.iterator.Current()
continue
} else {
rv = append([]byte(nil), br.doc...)
break
}
}
if valid && r.onlyPos < len(r.only) {
ok := r.nextOnly()
if ok {
r.iterator.Seek(NewBackIndexRow([]byte(r.only[r.onlyPos]), nil, nil).Key())
}
return rv, nil
}
} else {
bir := NewBackIndexRow(docID, nil, nil)
r.iterator.Seek(bir.Key())
key, val, valid := r.iterator.Current()
if valid {
br, err := NewBackIndexRowKV(key, val)
if err != nil {
return nil, err
}
rv := append([]byte(nil), br.doc...)
r.iterator.Next()
return rv, nil
}
}
return nil, nil
}
func (r *UpsideDownCouchDocIDReader) Close() error {
return r.iterator.Close()
}
// move the r.only pos forward one, skipping duplicates
// return true if there is more data, or false if we got to the end of the list
func (r *UpsideDownCouchDocIDReader) nextOnly() bool {
// advance 1 position, until we see a different key
// it's already sorted, so this skips duplicates
start := r.onlyPos
r.onlyPos++
for r.onlyPos < len(r.only) && r.only[r.onlyPos] == r.only[start] {
start = r.onlyPos
r.onlyPos++
}
// indicate if we got to the end of the list
return r.onlyPos < len(r.only)
}
================================================
FILE: index/upsidedown/reader_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"context"
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb"
index "github.com/blevesearch/bleve_index_api"
)
func TestIndexReader(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
doc = document.NewDocument("2")
doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test test test"), testAnalyzer))
doc.AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("eat more rice"), index.IndexField|index.IncludeTermVectors, testAnalyzer))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
// first look for a term that doesn't exist
reader, err := indexReader.TermFieldReader(context.TODO(), []byte("nope"), "name", true, true, true)
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
count := reader.Count()
if count != 0 {
t.Errorf("Expected doc count to be: %d got: %d", 0, count)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
reader, err = indexReader.TermFieldReader(context.TODO(), []byte("test"), "name", true, true, true)
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
count = reader.Count()
if count != expectedCount {
t.Errorf("Expected doc count to be: %d got: %d", expectedCount, count)
}
var match *index.TermFieldDoc
var actualCount uint64
match, err = reader.Next(nil)
for err == nil && match != nil {
match, err = reader.Next(nil)
if err != nil {
t.Errorf("unexpected error reading next")
}
actualCount++
}
if actualCount != count {
t.Errorf("count was 2, but only saw %d", actualCount)
}
expectedMatch := &index.TermFieldDoc{
ID: index.IndexInternalID("2"),
Freq: 1,
Norm: 0.5773502588272095,
Vectors: []*index.TermFieldVector{
{
Field: "desc",
Pos: 3,
Start: 9,
End: 13,
},
},
}
tfr, err := indexReader.TermFieldReader(context.TODO(), []byte("rice"), "desc", true, true, true)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
match, err = tfr.Next(nil)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if !reflect.DeepEqual(expectedMatch, match) {
t.Errorf("got %#v, expected %#v", match, expectedMatch)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// now test usage of advance
reader, err = indexReader.TermFieldReader(context.TODO(), []byte("test"), "name", true, true, true)
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
match, err = reader.Advance(index.IndexInternalID("2"), nil)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match == nil {
t.Fatalf("Expected match, got nil")
}
if !match.ID.Equals(index.IndexInternalID("2")) {
t.Errorf("Expected ID '2', got '%s'", match.ID)
}
match, err = reader.Advance(index.IndexInternalID("3"), nil)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match != nil {
t.Errorf("expected nil, got %v", match)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// now test creating a reader for a field that doesn't exist
reader, err = indexReader.TermFieldReader(context.TODO(), []byte("water"), "doesnotexist", true, true, true)
if err != nil {
t.Errorf("Error accessing term field reader: %v", err)
}
count = reader.Count()
if count != 0 {
t.Errorf("expected count 0 for reader of non-existent field")
}
match, err = reader.Next(nil)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match != nil {
t.Errorf("expected nil, got %v", match)
}
match, err = reader.Advance(index.IndexInternalID("anywhere"), nil)
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match != nil {
t.Errorf("expected nil, got %v", match)
}
}
func TestIndexDocIdReader(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test test test")))
doc.AddField(document.NewTextFieldWithIndexingOptions("desc", []uint64{}, []byte("eat more rice"), index.IndexField|index.IncludeTermVectors))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Error(err)
}
}()
// first get all doc ids
reader, err := indexReader.DocIDReaderAll()
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader.Close()
if err != nil {
t.Fatal(err)
}
}()
id, err := reader.Next()
if err != nil {
t.Fatal(err)
}
count := uint64(0)
for id != nil {
count++
id, err = reader.Next()
if err != nil {
t.Fatal(err)
}
}
if count != expectedCount {
t.Errorf("expected %d, got %d", expectedCount, count)
}
// try it again, but jump to the second doc this time
reader2, err := indexReader.DocIDReaderAll()
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader2.Close()
if err != nil {
t.Error(err)
}
}()
id, err = reader2.Advance(index.IndexInternalID("2"))
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("2")) {
t.Errorf("expected to find id '2', got '%s'", id)
}
id, err = reader2.Advance(index.IndexInternalID("3"))
if err != nil {
t.Error(err)
}
if id != nil {
t.Errorf("expected to find id '', got '%s'", id)
}
}
func TestCrashBadBackIndexRow(t *testing.T) {
br, err := NewBackIndexRowKV([]byte{byte('b'), byte('a'), ByteSeparator}, []byte{})
if err != nil {
t.Fatal(err)
}
if string(br.doc) != "a" {
t.Fatal(err)
}
}
func TestIndexDocIdOnlyReader(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
doc = document.NewDocument("3")
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
doc = document.NewDocument("5")
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
doc = document.NewDocument("7")
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
doc = document.NewDocument("9")
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Error(err)
}
}()
onlyIds := []string{"1", "5", "9"}
reader, err := indexReader.DocIDReaderOnly(onlyIds)
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader.Close()
if err != nil {
t.Fatal(err)
}
}()
id, err := reader.Next()
if err != nil {
t.Fatal(err)
}
count := uint64(0)
for id != nil {
count++
id, err = reader.Next()
if err != nil {
t.Fatal(err)
}
}
if count != 3 {
t.Errorf("expected 3, got %d", count)
}
// try it again, but jump
reader2, err := indexReader.DocIDReaderOnly(onlyIds)
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader2.Close()
if err != nil {
t.Error(err)
}
}()
id, err = reader2.Advance(index.IndexInternalID("5"))
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("5")) {
t.Errorf("expected to find id '5', got '%s'", id)
}
id, err = reader2.Advance(index.IndexInternalID("a"))
if err != nil {
t.Error(err)
}
if id != nil {
t.Errorf("expected to find id '', got '%s'", id)
}
// some keys aren't actually there
onlyIds = []string{"0", "2", "4", "5", "6", "8", "a"}
reader3, err := indexReader.DocIDReaderOnly(onlyIds)
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader3.Close()
if err != nil {
t.Error(err)
}
}()
id, err = reader3.Next()
if err != nil {
t.Fatal(err)
}
count = uint64(0)
for id != nil {
count++
id, err = reader3.Next()
if err != nil {
t.Fatal(err)
}
}
if count != 1 {
t.Errorf("expected 1, got %d", count)
}
// mix advance and next
onlyIds = []string{"0", "1", "3", "5", "6", "9"}
reader4, err := indexReader.DocIDReaderOnly(onlyIds)
if err != nil {
t.Errorf("Error accessing doc id reader: %v", err)
}
defer func() {
err := reader4.Close()
if err != nil {
t.Error(err)
}
}()
// first key is "1"
id, err = reader4.Next()
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("1")) {
t.Errorf("expected to find id '1', got '%s'", id)
}
// advancing to key we dont have gives next
id, err = reader4.Advance(index.IndexInternalID("2"))
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("3")) {
t.Errorf("expected to find id '3', got '%s'", id)
}
// next after advance works
id, err = reader4.Next()
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("5")) {
t.Errorf("expected to find id '5', got '%s'", id)
}
// advancing to key we do have works
id, err = reader4.Advance(index.IndexInternalID("9"))
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("9")) {
t.Errorf("expected to find id '9', got '%s'", id)
}
// advance backwards at end
id, err = reader4.Advance(index.IndexInternalID("4"))
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("5")) {
t.Errorf("expected to find id '5', got '%s'", id)
}
// next after advance works
id, err = reader4.Next()
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("9")) {
t.Errorf("expected to find id '9', got '%s'", id)
}
// advance backwards to key that exists, but not in only set
id, err = reader4.Advance(index.IndexInternalID("7"))
if err != nil {
t.Error(err)
}
if !id.Equals(index.IndexInternalID("9")) {
t.Errorf("expected to find id '9', got '%s'", id)
}
}
================================================
FILE: index/upsidedown/row.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"bytes"
"encoding/binary"
"fmt"
"io"
"math"
"reflect"
"github.com/blevesearch/bleve/v2/size"
"google.golang.org/protobuf/proto"
)
var (
reflectStaticSizeTermFrequencyRow int
reflectStaticSizeTermVector int
)
func init() {
var tfr TermFrequencyRow
reflectStaticSizeTermFrequencyRow = int(reflect.TypeOf(tfr).Size())
var tv TermVector
reflectStaticSizeTermVector = int(reflect.TypeOf(tv).Size())
}
const ByteSeparator byte = 0xff
type UpsideDownCouchRowStream chan UpsideDownCouchRow
type UpsideDownCouchRow interface {
KeySize() int
KeyTo([]byte) (int, error)
Key() []byte
Value() []byte
ValueSize() int
ValueTo([]byte) (int, error)
}
func ParseFromKeyValue(key, value []byte) (UpsideDownCouchRow, error) {
if len(key) > 0 {
switch key[0] {
case 'v':
return NewVersionRowKV(key, value)
case 'f':
return NewFieldRowKV(key, value)
case 'd':
return NewDictionaryRowKV(key, value)
case 't':
return NewTermFrequencyRowKV(key, value)
case 'b':
return NewBackIndexRowKV(key, value)
case 's':
return NewStoredRowKV(key, value)
case 'i':
return NewInternalRowKV(key, value)
}
return nil, fmt.Errorf("Unknown field type '%s'", string(key[0]))
}
return nil, fmt.Errorf("Invalid empty key")
}
// VERSION
type VersionRow struct {
version uint8
}
func (v *VersionRow) Key() []byte {
return []byte{'v'}
}
func (v *VersionRow) KeySize() int {
return 1
}
func (v *VersionRow) KeyTo(buf []byte) (int, error) {
buf[0] = 'v'
return 1, nil
}
func (v *VersionRow) Value() []byte {
return []byte{byte(v.version)}
}
func (v *VersionRow) ValueSize() int {
return 1
}
func (v *VersionRow) ValueTo(buf []byte) (int, error) {
buf[0] = v.version
return 1, nil
}
func (v *VersionRow) String() string {
return fmt.Sprintf("Version: %d", v.version)
}
func NewVersionRow(version uint8) *VersionRow {
return &VersionRow{
version: version,
}
}
func NewVersionRowKV(key, value []byte) (*VersionRow, error) {
rv := VersionRow{}
buf := bytes.NewBuffer(value)
err := binary.Read(buf, binary.LittleEndian, &rv.version)
if err != nil {
return nil, err
}
return &rv, nil
}
// INTERNAL STORAGE
type InternalRow struct {
key []byte
val []byte
}
func (i *InternalRow) Key() []byte {
buf := make([]byte, i.KeySize())
size, _ := i.KeyTo(buf)
return buf[:size]
}
func (i *InternalRow) KeySize() int {
return len(i.key) + 1
}
func (i *InternalRow) KeyTo(buf []byte) (int, error) {
buf[0] = 'i'
actual := copy(buf[1:], i.key)
return 1 + actual, nil
}
func (i *InternalRow) Value() []byte {
return i.val
}
func (i *InternalRow) ValueSize() int {
return len(i.val)
}
func (i *InternalRow) ValueTo(buf []byte) (int, error) {
actual := copy(buf, i.val)
return actual, nil
}
func (i *InternalRow) String() string {
return fmt.Sprintf("InternalStore - Key: %s (% x) Val: %s (% x)", i.key, i.key, i.val, i.val)
}
func NewInternalRow(key, val []byte) *InternalRow {
return &InternalRow{
key: key,
val: val,
}
}
func NewInternalRowKV(key, value []byte) (*InternalRow, error) {
rv := InternalRow{}
rv.key = key[1:]
rv.val = value
return &rv, nil
}
// FIELD definition
type FieldRow struct {
index uint16
name string
}
func (f *FieldRow) Key() []byte {
buf := make([]byte, f.KeySize())
size, _ := f.KeyTo(buf)
return buf[:size]
}
func (f *FieldRow) KeySize() int {
return 3
}
func (f *FieldRow) KeyTo(buf []byte) (int, error) {
buf[0] = 'f'
binary.LittleEndian.PutUint16(buf[1:3], f.index)
return 3, nil
}
func (f *FieldRow) Value() []byte {
return append([]byte(f.name), ByteSeparator)
}
func (f *FieldRow) ValueSize() int {
return len(f.name) + 1
}
func (f *FieldRow) ValueTo(buf []byte) (int, error) {
size := copy(buf, f.name)
buf[size] = ByteSeparator
return size + 1, nil
}
func (f *FieldRow) String() string {
return fmt.Sprintf("Field: %d Name: %s", f.index, f.name)
}
func NewFieldRow(index uint16, name string) *FieldRow {
return &FieldRow{
index: index,
name: name,
}
}
func NewFieldRowKV(key, value []byte) (*FieldRow, error) {
rv := FieldRow{}
buf := bytes.NewBuffer(key)
_, err := buf.ReadByte() // type
if err != nil {
return nil, err
}
err = binary.Read(buf, binary.LittleEndian, &rv.index)
if err != nil {
return nil, err
}
buf = bytes.NewBuffer(value)
rv.name, err = buf.ReadString(ByteSeparator)
if err != nil {
return nil, err
}
rv.name = rv.name[:len(rv.name)-1] // trim off separator byte
return &rv, nil
}
// DICTIONARY
const DictionaryRowMaxValueSize = binary.MaxVarintLen64
type DictionaryRow struct {
term []byte
count uint64
field uint16
}
func (dr *DictionaryRow) Key() []byte {
buf := make([]byte, dr.KeySize())
size, _ := dr.KeyTo(buf)
return buf[:size]
}
func (dr *DictionaryRow) KeySize() int {
return dictionaryRowKeySize(dr.term)
}
func dictionaryRowKeySize(term []byte) int {
return len(term) + 3
}
func (dr *DictionaryRow) KeyTo(buf []byte) (int, error) {
return dictionaryRowKeyTo(buf, dr.field, dr.term), nil
}
func dictionaryRowKeyTo(buf []byte, field uint16, term []byte) int {
buf[0] = 'd'
binary.LittleEndian.PutUint16(buf[1:3], field)
size := copy(buf[3:], term)
return size + 3
}
func (dr *DictionaryRow) Value() []byte {
buf := make([]byte, dr.ValueSize())
size, _ := dr.ValueTo(buf)
return buf[:size]
}
func (dr *DictionaryRow) ValueSize() int {
return DictionaryRowMaxValueSize
}
func (dr *DictionaryRow) ValueTo(buf []byte) (int, error) {
used := binary.PutUvarint(buf, dr.count)
return used, nil
}
func (dr *DictionaryRow) String() string {
return fmt.Sprintf("Dictionary Term: `%s` Field: %d Count: %d ", string(dr.term), dr.field, dr.count)
}
func NewDictionaryRow(term []byte, field uint16, count uint64) *DictionaryRow {
return &DictionaryRow{
term: term,
field: field,
count: count,
}
}
func NewDictionaryRowKV(key, value []byte) (*DictionaryRow, error) {
rv, err := NewDictionaryRowK(key)
if err != nil {
return nil, err
}
err = rv.parseDictionaryV(value)
if err != nil {
return nil, err
}
return rv, nil
}
func NewDictionaryRowK(key []byte) (*DictionaryRow, error) {
rv := &DictionaryRow{}
err := rv.parseDictionaryK(key)
if err != nil {
return nil, err
}
return rv, nil
}
func (dr *DictionaryRow) parseDictionaryK(key []byte) error {
dr.field = binary.LittleEndian.Uint16(key[1:3])
if dr.term != nil {
dr.term = dr.term[:0]
}
dr.term = append(dr.term, key[3:]...)
return nil
}
func (dr *DictionaryRow) parseDictionaryV(value []byte) error {
count, err := dictionaryRowParseV(value)
if err != nil {
return err
}
dr.count = count
return nil
}
func dictionaryRowParseV(value []byte) (uint64, error) {
count, nread := binary.Uvarint(value)
if nread <= 0 {
return 0, fmt.Errorf("DictionaryRow parse Uvarint error, nread: %d", nread)
}
return count, nil
}
// TERM FIELD FREQUENCY
type TermVector struct {
field uint16
arrayPositions []uint64
pos uint64
start uint64
end uint64
}
func (tv *TermVector) Size() int {
return reflectStaticSizeTermVector + size.SizeOfPtr +
len(tv.arrayPositions)*size.SizeOfUint64
}
func (tv *TermVector) String() string {
return fmt.Sprintf("Field: %d Pos: %d Start: %d End %d ArrayPositions: %#v", tv.field, tv.pos, tv.start, tv.end, tv.arrayPositions)
}
type TermFrequencyRow struct {
term []byte
doc []byte
freq uint64
vectors []*TermVector
norm float32
field uint16
}
func (tfr *TermFrequencyRow) Size() int {
sizeInBytes := reflectStaticSizeTermFrequencyRow +
len(tfr.term) +
len(tfr.doc)
for _, entry := range tfr.vectors {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
func (tfr *TermFrequencyRow) Term() []byte {
return tfr.term
}
func (tfr *TermFrequencyRow) Freq() uint64 {
return tfr.freq
}
func (tfr *TermFrequencyRow) ScanPrefixForField() []byte {
buf := make([]byte, 3)
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
return buf
}
func (tfr *TermFrequencyRow) ScanPrefixForFieldTermPrefix() []byte {
buf := make([]byte, 3+len(tfr.term))
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
copy(buf[3:], tfr.term)
return buf
}
func (tfr *TermFrequencyRow) ScanPrefixForFieldTerm() []byte {
buf := make([]byte, 3+len(tfr.term)+1)
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], tfr.field)
termLen := copy(buf[3:], tfr.term)
buf[3+termLen] = ByteSeparator
return buf
}
func (tfr *TermFrequencyRow) Key() []byte {
buf := make([]byte, tfr.KeySize())
size, _ := tfr.KeyTo(buf)
return buf[:size]
}
func (tfr *TermFrequencyRow) KeySize() int {
return termFrequencyRowKeySize(tfr.term, tfr.doc)
}
func termFrequencyRowKeySize(term, doc []byte) int {
return 3 + len(term) + 1 + len(doc)
}
func (tfr *TermFrequencyRow) KeyTo(buf []byte) (int, error) {
return termFrequencyRowKeyTo(buf, tfr.field, tfr.term, tfr.doc), nil
}
func termFrequencyRowKeyTo(buf []byte, field uint16, term, doc []byte) int {
buf[0] = 't'
binary.LittleEndian.PutUint16(buf[1:3], field)
termLen := copy(buf[3:], term)
buf[3+termLen] = ByteSeparator
docLen := copy(buf[3+termLen+1:], doc)
return 3 + termLen + 1 + docLen
}
func (tfr *TermFrequencyRow) KeyAppendTo(buf []byte) ([]byte, error) {
keySize := tfr.KeySize()
if cap(buf) < keySize {
buf = make([]byte, keySize)
}
actualSize, err := tfr.KeyTo(buf[0:keySize])
return buf[0:actualSize], err
}
func (tfr *TermFrequencyRow) DictionaryRowKey() []byte {
dr := NewDictionaryRow(tfr.term, tfr.field, 0)
return dr.Key()
}
func (tfr *TermFrequencyRow) DictionaryRowKeySize() int {
dr := NewDictionaryRow(tfr.term, tfr.field, 0)
return dr.KeySize()
}
func (tfr *TermFrequencyRow) DictionaryRowKeyTo(buf []byte) (int, error) {
dr := NewDictionaryRow(tfr.term, tfr.field, 0)
return dr.KeyTo(buf)
}
func (tfr *TermFrequencyRow) Value() []byte {
buf := make([]byte, tfr.ValueSize())
size, _ := tfr.ValueTo(buf)
return buf[:size]
}
func (tfr *TermFrequencyRow) ValueSize() int {
bufLen := binary.MaxVarintLen64 + binary.MaxVarintLen64
for _, vector := range tfr.vectors {
bufLen += (binary.MaxVarintLen64 * 4) + (1+len(vector.arrayPositions))*binary.MaxVarintLen64
}
return bufLen
}
func (tfr *TermFrequencyRow) ValueTo(buf []byte) (int, error) {
used := binary.PutUvarint(buf[:binary.MaxVarintLen64], tfr.freq)
normuint32 := math.Float32bits(tfr.norm)
newbuf := buf[used : used+binary.MaxVarintLen64]
used += binary.PutUvarint(newbuf, uint64(normuint32))
for _, vector := range tfr.vectors {
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(vector.field))
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.pos)
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.start)
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], vector.end)
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], uint64(len(vector.arrayPositions)))
for _, arrayPosition := range vector.arrayPositions {
used += binary.PutUvarint(buf[used:used+binary.MaxVarintLen64], arrayPosition)
}
}
return used, nil
}
func (tfr *TermFrequencyRow) String() string {
return fmt.Sprintf("Term: `%s` Field: %d DocId: `%s` Frequency: %d Norm: %f Vectors: %v", string(tfr.term), tfr.field, string(tfr.doc), tfr.freq, tfr.norm, tfr.vectors)
}
func InitTermFrequencyRow(tfr *TermFrequencyRow, term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow {
tfr.term = term
tfr.field = field
tfr.doc = docID
tfr.freq = freq
tfr.norm = norm
return tfr
}
func NewTermFrequencyRow(term []byte, field uint16, docID []byte, freq uint64, norm float32) *TermFrequencyRow {
return &TermFrequencyRow{
term: term,
field: field,
doc: docID,
freq: freq,
norm: norm,
}
}
func NewTermFrequencyRowWithTermVectors(term []byte, field uint16, docID []byte, freq uint64, norm float32, vectors []*TermVector) *TermFrequencyRow {
return &TermFrequencyRow{
term: term,
field: field,
doc: docID,
freq: freq,
norm: norm,
vectors: vectors,
}
}
func NewTermFrequencyRowK(key []byte) (*TermFrequencyRow, error) {
rv := &TermFrequencyRow{}
err := rv.parseK(key)
if err != nil {
return nil, err
}
return rv, nil
}
func (tfr *TermFrequencyRow) parseK(key []byte) error {
keyLen := len(key)
if keyLen < 3 {
return fmt.Errorf("invalid term frequency key, no valid field")
}
tfr.field = binary.LittleEndian.Uint16(key[1:3])
termEndPos := bytes.IndexByte(key[3:], ByteSeparator)
if termEndPos < 0 {
return fmt.Errorf("invalid term frequency key, no byte separator terminating term")
}
tfr.term = key[3 : 3+termEndPos]
docLen := keyLen - (3 + termEndPos + 1)
if docLen < 1 {
return fmt.Errorf("invalid term frequency key, empty docid")
}
tfr.doc = key[3+termEndPos+1:]
return nil
}
func (tfr *TermFrequencyRow) parseKDoc(key []byte, term []byte) error {
tfr.doc = key[3+len(term)+1:]
if len(tfr.doc) == 0 {
return fmt.Errorf("invalid term frequency key, empty docid")
}
return nil
}
func (tfr *TermFrequencyRow) parseV(value []byte, includeTermVectors bool) error {
var bytesRead int
tfr.freq, bytesRead = binary.Uvarint(value)
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, invalid frequency")
}
currOffset := bytesRead
var norm uint64
norm, bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, no norm")
}
currOffset += bytesRead
tfr.norm = math.Float32frombits(uint32(norm))
tfr.vectors = nil
if !includeTermVectors {
return nil
}
var field uint64
field, bytesRead = binary.Uvarint(value[currOffset:])
for bytesRead > 0 {
currOffset += bytesRead
tv := TermVector{}
tv.field = uint16(field)
// at this point we expect at least one term vector
if tfr.vectors == nil {
tfr.vectors = make([]*TermVector, 0)
}
tv.pos, bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector contains no position")
}
currOffset += bytesRead
tv.start, bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector contains no start")
}
currOffset += bytesRead
tv.end, bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector contains no end")
}
currOffset += bytesRead
var arrayPositionsLen uint64
arrayPositionsLen, bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector contains no arrayPositionLen")
}
currOffset += bytesRead
if arrayPositionsLen > 0 {
tv.arrayPositions = make([]uint64, arrayPositionsLen)
for i := 0; uint64(i) < arrayPositionsLen; i++ {
tv.arrayPositions[i], bytesRead = binary.Uvarint(value[currOffset:])
if bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector contains no arrayPosition of index %d", i)
}
currOffset += bytesRead
}
}
tfr.vectors = append(tfr.vectors, &tv)
// try to read next record (may not exist)
field, bytesRead = binary.Uvarint(value[currOffset:])
}
if len(value[currOffset:]) > 0 && bytesRead <= 0 {
return fmt.Errorf("invalid term frequency value, vector field invalid")
}
return nil
}
func NewTermFrequencyRowKV(key, value []byte) (*TermFrequencyRow, error) {
rv, err := NewTermFrequencyRowK(key)
if err != nil {
return nil, err
}
err = rv.parseV(value, true)
if err != nil {
return nil, err
}
return rv, nil
}
type BackIndexRow struct {
doc []byte
termsEntries []*BackIndexTermsEntry
storedEntries []*BackIndexStoreEntry
}
func (br *BackIndexRow) AllTermKeys() [][]byte {
if br == nil {
return nil
}
rv := make([][]byte, 0, len(br.termsEntries)) // FIXME this underestimates severely
for _, termsEntry := range br.termsEntries {
for i := range termsEntry.Terms {
termRow := NewTermFrequencyRow([]byte(termsEntry.Terms[i]), uint16(termsEntry.GetField()), br.doc, 0, 0)
rv = append(rv, termRow.Key())
}
}
return rv
}
func (br *BackIndexRow) AllStoredKeys() [][]byte {
if br == nil {
return nil
}
rv := make([][]byte, len(br.storedEntries))
for i, storedEntry := range br.storedEntries {
storedRow := NewStoredRow(br.doc, uint16(storedEntry.GetField()), storedEntry.GetArrayPositions(), 'x', []byte{})
rv[i] = storedRow.Key()
}
return rv
}
func (br *BackIndexRow) Key() []byte {
buf := make([]byte, br.KeySize())
size, _ := br.KeyTo(buf)
return buf[:size]
}
func (br *BackIndexRow) KeySize() int {
return len(br.doc) + 1
}
func (br *BackIndexRow) KeyTo(buf []byte) (int, error) {
buf[0] = 'b'
used := copy(buf[1:], br.doc)
return used + 1, nil
}
func (br *BackIndexRow) Value() []byte {
buf := make([]byte, br.ValueSize())
size, _ := br.ValueTo(buf)
return buf[:size]
}
func (br *BackIndexRow) ValueSize() int {
birv := &BackIndexRowValue{
TermsEntries: br.termsEntries,
StoredEntries: br.storedEntries,
}
return birv.Size()
}
func (br *BackIndexRow) ValueTo(buf []byte) (int, error) {
birv := &BackIndexRowValue{
TermsEntries: br.termsEntries,
StoredEntries: br.storedEntries,
}
return birv.MarshalTo(buf)
}
func (br *BackIndexRow) String() string {
return fmt.Sprintf("Backindex DocId: `%s` Terms Entries: %v, Stored Entries: %v", string(br.doc), br.termsEntries, br.storedEntries)
}
func NewBackIndexRow(docID []byte, entries []*BackIndexTermsEntry, storedFields []*BackIndexStoreEntry) *BackIndexRow {
return &BackIndexRow{
doc: docID,
termsEntries: entries,
storedEntries: storedFields,
}
}
func NewBackIndexRowKV(key, value []byte) (*BackIndexRow, error) {
rv := BackIndexRow{}
buf := bytes.NewBuffer(key)
_, err := buf.ReadByte() // type
if err != nil {
return nil, err
}
rv.doc, err = buf.ReadBytes(ByteSeparator)
if err == io.EOF && len(rv.doc) < 1 {
err = fmt.Errorf("invalid doc length 0 - % x", key)
}
if err != nil && err != io.EOF {
return nil, err
} else if err == nil {
rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte
}
var birv BackIndexRowValue
err = proto.Unmarshal(value, &birv)
if err != nil {
return nil, err
}
rv.termsEntries = birv.TermsEntries
rv.storedEntries = birv.StoredEntries
return &rv, nil
}
// STORED
type StoredRow struct {
doc []byte
field uint16
arrayPositions []uint64
typ byte
value []byte
}
func (s *StoredRow) Key() []byte {
buf := make([]byte, s.KeySize())
size, _ := s.KeyTo(buf)
return buf[0:size]
}
func (s *StoredRow) KeySize() int {
return 1 + len(s.doc) + 1 + 2 + (binary.MaxVarintLen64 * len(s.arrayPositions))
}
func (s *StoredRow) KeyTo(buf []byte) (int, error) {
docLen := len(s.doc)
buf[0] = 's'
copy(buf[1:], s.doc)
buf[1+docLen] = ByteSeparator
binary.LittleEndian.PutUint16(buf[1+docLen+1:], s.field)
bytesUsed := 1 + docLen + 1 + 2
for _, arrayPosition := range s.arrayPositions {
varbytes := binary.PutUvarint(buf[bytesUsed:], arrayPosition)
bytesUsed += varbytes
}
return bytesUsed, nil
}
func (s *StoredRow) Value() []byte {
buf := make([]byte, s.ValueSize())
size, _ := s.ValueTo(buf)
return buf[:size]
}
func (s *StoredRow) ValueSize() int {
return len(s.value) + 1
}
func (s *StoredRow) ValueTo(buf []byte) (int, error) {
buf[0] = s.typ
used := copy(buf[1:], s.value)
return used + 1, nil
}
func (s *StoredRow) String() string {
return fmt.Sprintf("Document: %s Field %d, Array Positions: %v, Type: %s Value: %s", s.doc, s.field, s.arrayPositions, string(s.typ), s.value)
}
func (s *StoredRow) ScanPrefixForDoc() []byte {
docLen := len(s.doc)
buf := make([]byte, 1+docLen+1)
buf[0] = 's'
copy(buf[1:], s.doc)
buf[1+docLen] = ByteSeparator
return buf
}
func NewStoredRow(docID []byte, field uint16, arrayPositions []uint64, typ byte, value []byte) *StoredRow {
return &StoredRow{
doc: docID,
field: field,
arrayPositions: arrayPositions,
typ: typ,
value: value,
}
}
func NewStoredRowK(key []byte) (*StoredRow, error) {
rv := StoredRow{}
buf := bytes.NewBuffer(key)
_, err := buf.ReadByte() // type
if err != nil {
return nil, err
}
rv.doc, err = buf.ReadBytes(ByteSeparator)
if err != nil {
return nil, err
}
if len(rv.doc) < 2 { // 1 for min doc id length, 1 for separator
err = fmt.Errorf("invalid doc length 0")
return nil, err
}
rv.doc = rv.doc[:len(rv.doc)-1] // trim off separator byte
err = binary.Read(buf, binary.LittleEndian, &rv.field)
if err != nil {
return nil, err
}
rv.arrayPositions = make([]uint64, 0)
nextArrayPos, err := binary.ReadUvarint(buf)
for err == nil {
rv.arrayPositions = append(rv.arrayPositions, nextArrayPos)
nextArrayPos, err = binary.ReadUvarint(buf)
}
return &rv, nil
}
func NewStoredRowKV(key, value []byte) (*StoredRow, error) {
rv, err := NewStoredRowK(key)
if err != nil {
return nil, err
}
rv.typ = value[0]
rv.value = value[1:]
return rv, nil
}
type backIndexFieldTermVisitor func(field uint32, term []byte)
// visitBackIndexRow is designed to process a protobuf encoded
// value, without creating unnecessary garbage. Instead values are passed
// to a callback, inspected first, and only copied if necessary.
// Due to the fact that this borrows from generated code, it must be marnually
// updated if the protobuf definition changes.
//
// This code originates from:
// func (m *BackIndexRowValue) Unmarshal(data []byte) error
// the sections which create garbage or parse uninteresting sections
// have been commented out. This was done by design to allow for easier
// merging in the future if that original function is regenerated
func visitBackIndexRow(data []byte, callback backIndexFieldTermVisitor) error {
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field TermsEntries", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + msglen
if msglen < 0 {
return ErrInvalidLengthUpsidedown
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
// dont parse term entries
// m.TermsEntries = append(m.TermsEntries, &BackIndexTermsEntry{})
// if err := m.TermsEntries[len(m.TermsEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
// return err
// }
// instead, inspect them
if err := visitBackIndexRowFieldTerms(data[iNdEx:postIndex], callback); err != nil {
return err
}
iNdEx = postIndex
case 2:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field StoredEntries", wireType)
}
var msglen int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
msglen |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + msglen
if msglen < 0 {
return ErrInvalidLengthUpsidedown
}
if postIndex > l {
return io.ErrUnexpectedEOF
}
// don't parse stored entries
// m.StoredEntries = append(m.StoredEntries, &BackIndexStoreEntry{})
// if err := m.StoredEntries[len(m.StoredEntries)-1].Unmarshal(data[iNdEx:postIndex]); err != nil {
// return err
// }
iNdEx = postIndex
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipUpsidedown(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthUpsidedown
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
// don't track unrecognized data
// m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
return nil
}
// visitBackIndexRowFieldTerms is designed to process a protobuf encoded
// sub-value within the BackIndexRowValue, without creating unnecessary garbage.
// Instead values are passed to a callback, inspected first, and only copied if
// necessary. Due to the fact that this borrows from generated code, it must
// be marnually updated if the protobuf definition changes.
//
// This code originates from:
// func (m *BackIndexTermsEntry) Unmarshal(data []byte) error {
// the sections which create garbage or parse uninteresting sections
// have been commented out. This was done by design to allow for easier
// merging in the future if that original function is regenerated
func visitBackIndexRowFieldTerms(data []byte, callback backIndexFieldTermVisitor) error {
var theField uint32
var hasFields [1]uint64
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
fieldNum := int32(wire >> 3)
wireType := int(wire & 0x7)
switch fieldNum {
case 1:
if wireType != 0 {
return fmt.Errorf("proto: wrong wireType = %d for field Field", wireType)
}
var v uint32
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
v |= (uint32(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
// m.Field = &v
theField = v
hasFields[0] |= uint64(0x00000001)
case 2:
if wireType != 2 {
return fmt.Errorf("proto: wrong wireType = %d for field Terms", wireType)
}
var stringLen uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
stringLen |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
postIndex := iNdEx + int(stringLen)
if postIndex > l {
return io.ErrUnexpectedEOF
}
// m.Terms = append(m.Terms, string(data[iNdEx:postIndex]))
callback(theField, data[iNdEx:postIndex])
iNdEx = postIndex
default:
var sizeOfWire int
for {
sizeOfWire++
wire >>= 7
if wire == 0 {
break
}
}
iNdEx -= sizeOfWire
skippy, err := skipUpsidedown(data[iNdEx:])
if err != nil {
return err
}
if skippy < 0 {
return ErrInvalidLengthUpsidedown
}
if (iNdEx + skippy) > l {
return io.ErrUnexpectedEOF
}
// m.XXX_unrecognized = append(m.XXX_unrecognized, data[iNdEx:iNdEx+skippy]...)
iNdEx += skippy
}
}
// if hasFields[0]&uint64(0x00000001) == 0 {
// return new(github_com_golang_protobuf_proto.RequiredNotSetError)
// }
return nil
}
================================================
FILE: index/upsidedown/row_merge.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"encoding/binary"
)
var mergeOperator upsideDownMerge
var dictionaryTermIncr []byte
var dictionaryTermDecr []byte
func init() {
dictionaryTermIncr = make([]byte, 8)
binary.LittleEndian.PutUint64(dictionaryTermIncr, uint64(1))
dictionaryTermDecr = make([]byte, 8)
var negOne = int64(-1)
binary.LittleEndian.PutUint64(dictionaryTermDecr, uint64(negOne))
}
type upsideDownMerge struct{}
func (m *upsideDownMerge) FullMerge(key, existingValue []byte, operands [][]byte) ([]byte, bool) {
// set up record based on key
dr, err := NewDictionaryRowK(key)
if err != nil {
return nil, false
}
if len(existingValue) > 0 {
// if existing value, parse it
err = dr.parseDictionaryV(existingValue)
if err != nil {
return nil, false
}
}
// now process operands
for _, operand := range operands {
next := int64(binary.LittleEndian.Uint64(operand))
if next < 0 && uint64(-next) > dr.count {
// subtracting next from existing would overflow
dr.count = 0
} else if next < 0 {
dr.count -= uint64(-next)
} else {
dr.count += uint64(next)
}
}
return dr.Value(), true
}
func (m *upsideDownMerge) PartialMerge(key, leftOperand, rightOperand []byte) ([]byte, bool) {
left := int64(binary.LittleEndian.Uint64(leftOperand))
right := int64(binary.LittleEndian.Uint64(rightOperand))
rv := make([]byte, 8)
binary.LittleEndian.PutUint64(rv, uint64(left+right))
return rv, true
}
func (m *upsideDownMerge) Name() string {
return "upsideDownMerge"
}
================================================
FILE: index/upsidedown/row_merge_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"bytes"
"encoding/binary"
"testing"
)
func TestPartialMerge(t *testing.T) {
tests := []struct {
in [][]byte
out uint64
}{
{
in: [][]byte{dictionaryTermIncr, dictionaryTermIncr, dictionaryTermIncr, dictionaryTermIncr, dictionaryTermIncr},
out: 5,
},
}
mo := &upsideDownMerge{}
for _, test := range tests {
curr := test.in[0]
for _, next := range test.in[1:] {
var ok bool
curr, ok = mo.PartialMerge([]byte("key"), curr, next)
if !ok {
t.Errorf("expected partial merge ok")
}
}
actual := decodeCount(curr)
if actual != test.out {
t.Errorf("expected %d, got %d", test.out, actual)
}
}
}
func decodeCount(in []byte) uint64 {
buf := bytes.NewBuffer(in)
count, _ := binary.ReadUvarint(buf)
return count
}
================================================
FILE: index/upsidedown/row_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"math"
"reflect"
"testing"
"google.golang.org/protobuf/proto"
)
func TestRows(t *testing.T) {
tests := []struct {
input UpsideDownCouchRow
outKey []byte
outVal []byte
}{
{
NewVersionRow(1),
[]byte{'v'},
[]byte{0x1},
},
{
NewFieldRow(0, "name"),
[]byte{'f', 0, 0},
[]byte{'n', 'a', 'm', 'e', ByteSeparator},
},
{
NewFieldRow(1, "desc"),
[]byte{'f', 1, 0},
[]byte{'d', 'e', 's', 'c', ByteSeparator},
},
{
NewFieldRow(513, "style"),
[]byte{'f', 1, 2},
[]byte{'s', 't', 'y', 'l', 'e', ByteSeparator},
},
{
NewDictionaryRow([]byte{'b', 'e', 'e', 'r'}, 0, 27),
[]byte{'d', 0, 0, 'b', 'e', 'e', 'r'},
[]byte{27},
},
{
NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, []byte("catz"), 3, 3.14),
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'c', 'a', 't', 'z'},
[]byte{3, 195, 235, 163, 130, 4},
},
{
NewTermFrequencyRow([]byte{'b', 'e', 'e', 'r'}, 0, []byte("budweiser"), 3, 3.14),
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{3, 195, 235, 163, 130, 4},
},
{
NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, []byte("budweiser"), 3, 3.14, []*TermVector{{field: 0, pos: 1, start: 3, end: 11}, {field: 0, pos: 2, start: 23, end: 31}, {field: 0, pos: 3, start: 43, end: 51}}),
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{3, 195, 235, 163, 130, 4, 0, 1, 3, 11, 0, 0, 2, 23, 31, 0, 0, 3, 43, 51, 0},
},
// test larger varints
{
NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, []byte("budweiser"), 25896, 3.14, []*TermVector{{field: 255, pos: 1, start: 3, end: 11}, {field: 0, pos: 2198, start: 23, end: 31}, {field: 0, pos: 3, start: 43, end: 51}}),
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{168, 202, 1, 195, 235, 163, 130, 4, 255, 1, 1, 3, 11, 0, 0, 150, 17, 23, 31, 0, 0, 3, 43, 51, 0},
},
// test vectors with arrayPositions
{
NewTermFrequencyRowWithTermVectors([]byte{'b', 'e', 'e', 'r'}, 0, []byte("budweiser"), 25896, 3.14, []*TermVector{{field: 255, pos: 1, start: 3, end: 11, arrayPositions: []uint64{0}}, {field: 0, pos: 2198, start: 23, end: 31, arrayPositions: []uint64{1, 2}}, {field: 0, pos: 3, start: 43, end: 51, arrayPositions: []uint64{3, 4, 5}}}),
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{168, 202, 1, 195, 235, 163, 130, 4, 255, 1, 1, 3, 11, 1, 0, 0, 150, 17, 23, 31, 2, 1, 2, 0, 3, 43, 51, 3, 3, 4, 5},
},
{
NewBackIndexRow([]byte("budweiser"), []*BackIndexTermsEntry{{Field: proto.Uint32(0), Terms: []string{"beer"}}}, nil),
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{10, 8, 8, 0, 18, 4, 'b', 'e', 'e', 'r'},
},
{
NewBackIndexRow([]byte("budweiser"), []*BackIndexTermsEntry{{Field: proto.Uint32(0), Terms: []string{"beer"}}, {Field: proto.Uint32(1), Terms: []string{"beat"}}}, nil),
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{10, 8, 8, 0, 18, 4, 'b', 'e', 'e', 'r', 10, 8, 8, 1, 18, 4, 'b', 'e', 'a', 't'},
},
{
NewBackIndexRow([]byte("budweiser"), []*BackIndexTermsEntry{{Field: proto.Uint32(0), Terms: []string{"beer"}}, {Field: proto.Uint32(1), Terms: []string{"beat"}}}, []*BackIndexStoreEntry{{Field: proto.Uint32(3)}, {Field: proto.Uint32(4)}, {Field: proto.Uint32(5)}}),
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{10, 8, 8, 0, 18, 4, 'b', 'e', 'e', 'r', 10, 8, 8, 1, 18, 4, 'b', 'e', 'a', 't', 18, 2, 8, 3, 18, 2, 8, 4, 18, 2, 8, 5},
},
{
NewStoredRow([]byte("budweiser"), 0, []uint64{}, byte('t'), []byte("an american beer")),
[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0},
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
},
{
NewStoredRow([]byte("budweiser"), 0, []uint64{2, 294, 3078}, byte('t'), []byte("an american beer")),
[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0, 2, 166, 2, 134, 24},
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
},
{
NewInternalRow([]byte("mapping"), []byte(`{"mapping":"json content"}`)),
[]byte{'i', 'm', 'a', 'p', 'p', 'i', 'n', 'g'},
[]byte{'{', '"', 'm', 'a', 'p', 'p', 'i', 'n', 'g', '"', ':', '"', 'j', 's', 'o', 'n', ' ', 'c', 'o', 'n', 't', 'e', 'n', 't', '"', '}'},
},
}
// test going from struct to k/v bytes
for i, test := range tests {
rk := test.input.Key()
if !reflect.DeepEqual(rk, test.outKey) {
t.Errorf("Expected key to be %v got: %v", test.outKey, rk)
}
rv := test.input.Value()
if !reflect.DeepEqual(rv, test.outVal) {
t.Errorf("Expected value to be %v got: %v for %d", test.outVal, rv, i)
}
}
// now test going back from k/v bytes to struct
for i, test := range tests {
row, err := ParseFromKeyValue(test.outKey, test.outVal)
if err != nil {
t.Errorf("error parsking key/value: %v", err)
}
if !reflect.DeepEqual(row, test.input) {
t.Errorf("Expected: %#v got: %#v for %d", test.input, row, i)
}
}
}
func TestInvalidRows(t *testing.T) {
tests := []struct {
key []byte
val []byte
}{
// empty key
{
[]byte{},
[]byte{},
},
// no such type q
{
[]byte{'q'},
[]byte{},
},
// type v, invalid empty value
{
[]byte{'v'},
[]byte{},
},
// type f, invalid key
{
[]byte{'f'},
[]byte{},
},
// type f, valid key, invalid value
{
[]byte{'f', 0, 0},
[]byte{},
},
// type t, invalid key (missing field)
{
[]byte{'t'},
[]byte{},
},
// type t, invalid key (missing term)
{
[]byte{'t', 0, 0},
[]byte{},
},
// type t, invalid key (missing id)
{
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator},
[]byte{},
},
// type t, invalid val (missing freq)
{
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{},
},
// type t, invalid val (missing norm)
{
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{3},
},
// type t, invalid val (half missing tv field, full missing is valid (no term vectors))
{
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{3, 25, 255},
},
// type t, invalid val (missing tv pos)
{
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{3, 25, 0},
},
// type t, invalid val (missing tv start)
{
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{3, 25, 0, 0},
},
// type t, invalid val (missing tv end)
{
[]byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{3, 25, 0, 0, 0},
},
// type b, invalid key (missing id)
{
[]byte{'b'},
[]byte{'b', 'e', 'e', 'r', ByteSeparator, 0, 0},
},
// type b, invalid val (missing field)
{
[]byte{'b', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'},
[]byte{'g', 'a', 'r', 'b', 'a', 'g', 'e'},
},
// type s, invalid key (missing id)
{
[]byte{'s'},
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
},
// type b, invalid val (missing field)
{
[]byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator},
[]byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'},
},
}
for _, test := range tests {
_, err := ParseFromKeyValue(test.key, test.val)
if err == nil {
t.Errorf("expected error, got nil")
}
}
}
func TestDictionaryRowValueBug197(t *testing.T) {
// this was the smallest value that would trigger a crash
dr := &DictionaryRow{
field: 0,
term: []byte("marty"),
count: 72057594037927936,
}
dr.Value()
// this is the maximum possible value
dr = &DictionaryRow{
field: 0,
term: []byte("marty"),
count: math.MaxUint64,
}
dr.Value()
// neither of these should panic
}
func BenchmarkTermFrequencyRowEncode(b *testing.B) {
row := NewTermFrequencyRowWithTermVectors(
[]byte{'b', 'e', 'e', 'r'},
0,
[]byte("budweiser"),
3,
3.14,
[]*TermVector{
{
field: 0,
pos: 1,
start: 3,
end: 11,
},
{
field: 0,
pos: 2,
start: 23,
end: 31,
},
{
field: 0,
pos: 3,
start: 43,
end: 51,
},
})
b.ResetTimer()
for i := 0; i < b.N; i++ {
row.Key()
row.Value()
}
}
func BenchmarkTermFrequencyRowDecode(b *testing.B) {
k := []byte{'t', 0, 0, 'b', 'e', 'e', 'r', ByteSeparator, 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r'}
v := []byte{3, 195, 235, 163, 130, 4, 0, 1, 3, 11, 0, 0, 2, 23, 31, 0, 0, 3, 43, 51, 0}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := NewTermFrequencyRowKV(k, v)
if err != nil {
b.Fatal(err)
}
}
}
func BenchmarkBackIndexRowEncode(b *testing.B) {
field := uint32(1)
t1 := "term1"
row := NewBackIndexRow([]byte("beername"),
[]*BackIndexTermsEntry{
{
Field: &field,
Terms: []string{t1},
},
},
[]*BackIndexStoreEntry{
{
Field: &field,
},
})
b.ResetTimer()
for i := 0; i < b.N; i++ {
row.Key()
row.Value()
b.Logf("%#v", row.Value())
}
}
func BenchmarkBackIndexRowDecode(b *testing.B) {
k := []byte{0x62, 0x62, 0x65, 0x65, 0x72, 0x6e, 0x61, 0x6d, 0x65}
v := []byte{0xa, 0x9, 0x8, 0x1, 0x12, 0x5, 0x74, 0x65, 0x72, 0x6d, 0x31, 0x12, 0x2, 0x8, 0x1}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := NewBackIndexRowKV(k, v)
if err != nil {
b.Fatal(err)
}
}
}
func BenchmarkStoredRowEncode(b *testing.B) {
row := NewStoredRow([]byte("budweiser"), 0, []uint64{}, byte('t'), []byte("an american beer"))
b.ResetTimer()
for i := 0; i < b.N; i++ {
row.Key()
row.Value()
}
}
func BenchmarkStoredRowDecode(b *testing.B) {
k := []byte{'s', 'b', 'u', 'd', 'w', 'e', 'i', 's', 'e', 'r', ByteSeparator, 0, 0}
v := []byte{'t', 'a', 'n', ' ', 'a', 'm', 'e', 'r', 'i', 'c', 'a', 'n', ' ', 'b', 'e', 'e', 'r'}
b.ResetTimer()
for i := 0; i < b.N; i++ {
_, err := NewStoredRowKV(k, v)
if err != nil {
b.Fatal(err)
}
}
}
func TestVisitBackIndexRow(t *testing.T) {
expected := map[uint32][]byte{
0: []byte("beer"),
1: []byte("beat"),
}
val := []byte{10, 8, 8, 0, 18, 4, 'b', 'e', 'e', 'r', 10, 8, 8, 1, 18, 4, 'b', 'e', 'a', 't', 18, 2, 8, 3, 18, 2, 8, 4, 18, 2, 8, 5}
err := visitBackIndexRow(val, func(field uint32, term []byte) {
if reflect.DeepEqual(expected[field], term) {
delete(expected, field)
}
})
if err != nil {
t.Fatal(err)
}
if len(expected) > 0 {
t.Errorf("expected visitor to see these but did not %v", expected)
}
}
================================================
FILE: index/upsidedown/stats.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"sync/atomic"
"github.com/blevesearch/bleve/v2/util"
"github.com/blevesearch/upsidedown_store_api"
)
type indexStat struct {
updates, deletes, batches, errors uint64
analysisTime, indexTime uint64
termSearchersStarted uint64
termSearchersFinished uint64
numPlainTextBytesIndexed uint64
i *UpsideDownCouch
}
func (i *indexStat) statsMap() map[string]interface{} {
m := map[string]interface{}{}
m["updates"] = atomic.LoadUint64(&i.updates)
m["deletes"] = atomic.LoadUint64(&i.deletes)
m["batches"] = atomic.LoadUint64(&i.batches)
m["errors"] = atomic.LoadUint64(&i.errors)
m["analysis_time"] = atomic.LoadUint64(&i.analysisTime)
m["index_time"] = atomic.LoadUint64(&i.indexTime)
m["term_searchers_started"] = atomic.LoadUint64(&i.termSearchersStarted)
m["term_searchers_finished"] = atomic.LoadUint64(&i.termSearchersFinished)
m["num_plain_text_bytes_indexed"] = atomic.LoadUint64(&i.numPlainTextBytesIndexed)
if o, ok := i.i.store.(store.KVStoreStats); ok {
m["kv"] = o.StatsMap()
}
return m
}
func (i *indexStat) MarshalJSON() ([]byte, error) {
m := i.statsMap()
return util.MarshalJSON(m)
}
================================================
FILE: index/upsidedown/store/boltdb/iterator.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package boltdb
import (
"bytes"
bolt "go.etcd.io/bbolt"
)
type Iterator struct {
store *Store
tx *bolt.Tx
cursor *bolt.Cursor
prefix []byte
start []byte
end []byte
valid bool
key []byte
val []byte
}
func (i *Iterator) updateValid() {
i.valid = (i.key != nil)
if i.valid {
if i.prefix != nil {
i.valid = bytes.HasPrefix(i.key, i.prefix)
} else if i.end != nil {
i.valid = bytes.Compare(i.key, i.end) < 0
}
}
}
func (i *Iterator) Seek(k []byte) {
if i.start != nil && bytes.Compare(k, i.start) < 0 {
k = i.start
}
if i.prefix != nil && !bytes.HasPrefix(k, i.prefix) {
if bytes.Compare(k, i.prefix) < 0 {
k = i.prefix
} else {
i.valid = false
return
}
}
i.key, i.val = i.cursor.Seek(k)
i.updateValid()
}
func (i *Iterator) Next() {
i.key, i.val = i.cursor.Next()
i.updateValid()
}
func (i *Iterator) Current() ([]byte, []byte, bool) {
return i.key, i.val, i.valid
}
func (i *Iterator) Key() []byte {
return i.key
}
func (i *Iterator) Value() []byte {
return i.val
}
func (i *Iterator) Valid() bool {
return i.valid
}
func (i *Iterator) Close() error {
return nil
}
================================================
FILE: index/upsidedown/store/boltdb/reader.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package boltdb
import (
store "github.com/blevesearch/upsidedown_store_api"
bolt "go.etcd.io/bbolt"
)
type Reader struct {
store *Store
tx *bolt.Tx
bucket *bolt.Bucket
}
func (r *Reader) Get(key []byte) ([]byte, error) {
var rv []byte
v := r.bucket.Get(key)
if v != nil {
rv = make([]byte, len(v))
copy(rv, v)
}
return rv, nil
}
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) {
return store.MultiGet(r, keys)
}
func (r *Reader) PrefixIterator(prefix []byte) store.KVIterator {
cursor := r.bucket.Cursor()
rv := &Iterator{
store: r.store,
tx: r.tx,
cursor: cursor,
prefix: prefix,
}
rv.Seek(prefix)
return rv
}
func (r *Reader) RangeIterator(start, end []byte) store.KVIterator {
cursor := r.bucket.Cursor()
rv := &Iterator{
store: r.store,
tx: r.tx,
cursor: cursor,
start: start,
end: end,
}
rv.Seek(start)
return rv
}
func (r *Reader) Close() error {
return r.tx.Rollback()
}
================================================
FILE: index/upsidedown/store/boltdb/stats.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package boltdb
import (
"github.com/blevesearch/bleve/v2/util"
)
type stats struct {
s *Store
}
func (s *stats) MarshalJSON() ([]byte, error) {
bs := s.s.db.Stats()
return util.MarshalJSON(bs)
}
================================================
FILE: index/upsidedown/store/boltdb/store.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package boltdb implements a store.KVStore on top of BoltDB. It supports the
// following options:
//
// "bucket" (string): the name of BoltDB bucket to use, defaults to "bleve".
//
// "nosync" (bool): if true, set boltdb.DB.NoSync to true. It speeds up index
// operations in exchange of losing integrity guarantees if indexation aborts
// without closing the index. Use it when rebuilding indexes from zero.
package boltdb
import (
"bytes"
"encoding/json"
"fmt"
"os"
"github.com/blevesearch/bleve/v2/registry"
store "github.com/blevesearch/upsidedown_store_api"
bolt "go.etcd.io/bbolt"
)
const (
Name = "boltdb"
defaultCompactBatchSize = 100
)
type Store struct {
path string
bucket string
db *bolt.DB
noSync bool
fillPercent float64
mo store.MergeOperator
}
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) {
path, ok := config["path"].(string)
if !ok {
return nil, fmt.Errorf("must specify path")
}
if path == "" {
return nil, os.ErrInvalid
}
bucket, ok := config["bucket"].(string)
if !ok {
bucket = "bleve"
}
noSync, _ := config["nosync"].(bool)
fillPercent, ok := config["fillPercent"].(float64)
if !ok {
fillPercent = bolt.DefaultFillPercent
}
bo := &bolt.Options{}
ro, ok := config["read_only"].(bool)
if ok {
bo.ReadOnly = ro
}
if initialMmapSize, ok := config["initialMmapSize"].(int); ok {
bo.InitialMmapSize = initialMmapSize
} else if initialMmapSize, ok := config["initialMmapSize"].(float64); ok {
bo.InitialMmapSize = int(initialMmapSize)
}
db, err := bolt.Open(path, 0600, bo)
if err != nil {
return nil, err
}
db.NoSync = noSync
if !bo.ReadOnly {
err = db.Update(func(tx *bolt.Tx) error {
_, err := tx.CreateBucketIfNotExists([]byte(bucket))
return err
})
if err != nil {
return nil, err
}
}
rv := Store{
path: path,
bucket: bucket,
db: db,
mo: mo,
noSync: noSync,
fillPercent: fillPercent,
}
return &rv, nil
}
func (bs *Store) Close() error {
return bs.db.Close()
}
func (bs *Store) Reader() (store.KVReader, error) {
tx, err := bs.db.Begin(false)
if err != nil {
return nil, err
}
return &Reader{
store: bs,
tx: tx,
bucket: tx.Bucket([]byte(bs.bucket)),
}, nil
}
func (bs *Store) Writer() (store.KVWriter, error) {
return &Writer{
store: bs,
}, nil
}
func (bs *Store) Stats() json.Marshaler {
return &stats{
s: bs,
}
}
// CompactWithBatchSize removes DictionaryTerm entries with a count of zero (in batchSize batches)
// Removing entries is a workaround for github issue #374.
func (bs *Store) CompactWithBatchSize(batchSize int) error {
for {
cnt := 0
err := bs.db.Batch(func(tx *bolt.Tx) error {
c := tx.Bucket([]byte(bs.bucket)).Cursor()
prefix := []byte("d")
for k, v := c.Seek(prefix); bytes.HasPrefix(k, prefix); k, v = c.Next() {
if bytes.Equal(v, []byte{0}) {
cnt++
if err := c.Delete(); err != nil {
return err
}
if cnt == batchSize {
break
}
}
}
return nil
})
if err != nil {
return err
}
if cnt == 0 {
break
}
}
return nil
}
// Compact calls CompactWithBatchSize with a default batch size of 100. This is a workaround
// for github issue #374.
func (bs *Store) Compact() error {
return bs.CompactWithBatchSize(defaultCompactBatchSize)
}
func init() {
err := registry.RegisterKVStore(Name, New)
if err != nil {
panic(err)
}
}
================================================
FILE: index/upsidedown/store/boltdb/store_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !darwin || !arm64
package boltdb
import (
"os"
"testing"
store "github.com/blevesearch/upsidedown_store_api"
"github.com/blevesearch/upsidedown_store_api/test"
bolt "go.etcd.io/bbolt"
)
func open(t *testing.T, mo store.MergeOperator) store.KVStore {
rv, err := New(mo, map[string]interface{}{"path": "test"})
if err != nil {
t.Fatal(err)
}
return rv
}
func cleanup(t *testing.T, s store.KVStore) {
err := s.Close()
if err != nil {
t.Fatal(err)
}
err = os.RemoveAll("test")
if err != nil {
t.Fatal(err)
}
}
func TestBoltDBKVCrud(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestKVCrud(t, s)
}
func TestBoltDBReaderIsolation(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestReaderIsolation(t, s)
}
func TestBoltDBReaderOwnsGetBytes(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestReaderOwnsGetBytes(t, s)
}
func TestBoltDBWriterOwnsBytes(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestWriterOwnsBytes(t, s)
}
func TestBoltDBPrefixIterator(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestPrefixIterator(t, s)
}
func TestBoltDBPrefixIteratorSeek(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestPrefixIteratorSeek(t, s)
}
func TestBoltDBRangeIterator(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestRangeIterator(t, s)
}
func TestBoltDBRangeIteratorSeek(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestRangeIteratorSeek(t, s)
}
func TestBoltDBMerge(t *testing.T) {
s := open(t, &test.TestMergeCounter{})
defer cleanup(t, s)
test.CommonTestMerge(t, s)
}
func TestBoltDBConfig(t *testing.T) {
var tests = []struct {
in map[string]interface{}
path string
bucket string
noSync bool
fillPercent float64
}{
{
map[string]interface{}{"path": "test", "bucket": "mybucket", "nosync": true, "fillPercent": 0.75},
"test",
"mybucket",
true,
0.75,
},
{
map[string]interface{}{"path": "test"},
"test",
"bleve",
false,
bolt.DefaultFillPercent,
},
}
for _, test := range tests {
kv, err := New(nil, test.in)
if err != nil {
t.Fatal(err)
}
bs, ok := kv.(*Store)
if !ok {
t.Fatal("failed type assertion to *boltdb.Store")
}
if bs.path != test.path {
t.Fatalf("path: expected %q, got %q", test.path, bs.path)
}
if bs.bucket != test.bucket {
t.Fatalf("bucket: expected %q, got %q", test.bucket, bs.bucket)
}
if bs.noSync != test.noSync {
t.Fatalf("noSync: expected %t, got %t", test.noSync, bs.noSync)
}
if bs.fillPercent != test.fillPercent {
t.Fatalf("fillPercent: expected %f, got %f", test.fillPercent, bs.fillPercent)
}
cleanup(t, kv)
}
}
================================================
FILE: index/upsidedown/store/boltdb/writer.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package boltdb
import (
"fmt"
store "github.com/blevesearch/upsidedown_store_api"
)
type Writer struct {
store *Store
}
func (w *Writer) NewBatch() store.KVBatch {
return store.NewEmulatedBatch(w.store.mo)
}
func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) {
return make([]byte, options.TotalBytes), w.NewBatch(), nil
}
func (w *Writer) ExecuteBatch(batch store.KVBatch) (err error) {
emulatedBatch, ok := batch.(*store.EmulatedBatch)
if !ok {
return fmt.Errorf("wrong type of batch")
}
tx, err := w.store.db.Begin(true)
if err != nil {
return
}
// defer function to ensure that once started,
// we either Commit tx or Rollback
defer func() {
// if nothing went wrong, commit
if err == nil {
// careful to catch error here too
err = tx.Commit()
} else {
// caller should see error that caused abort,
// not success or failure of Rollback itself
_ = tx.Rollback()
}
}()
bucket := tx.Bucket([]byte(w.store.bucket))
bucket.FillPercent = w.store.fillPercent
for k, mergeOps := range emulatedBatch.Merger.Merges {
kb := []byte(k)
existingVal := bucket.Get(kb)
mergedVal, fullMergeOk := w.store.mo.FullMerge(kb, existingVal, mergeOps)
if !fullMergeOk {
err = fmt.Errorf("merge operator returned failure")
return
}
err = bucket.Put(kb, mergedVal)
if err != nil {
return
}
}
for _, op := range emulatedBatch.Ops {
if op.V != nil {
err = bucket.Put(op.K, op.V)
if err != nil {
return
}
} else {
err = bucket.Delete(op.K)
if err != nil {
return
}
}
}
return
}
func (w *Writer) Close() error {
return nil
}
================================================
FILE: index/upsidedown/store/goleveldb/batch.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package goleveldb
import (
"github.com/blevesearch/goleveldb/leveldb"
store "github.com/blevesearch/upsidedown_store_api"
)
type Batch struct {
store *Store
merge *store.EmulatedMerge
batch *leveldb.Batch
}
func (b *Batch) Set(key, val []byte) {
b.batch.Put(key, val)
}
func (b *Batch) Delete(key []byte) {
b.batch.Delete(key)
}
func (b *Batch) Merge(key, val []byte) {
b.merge.Merge(key, val)
}
func (b *Batch) Reset() {
b.batch.Reset()
b.merge = store.NewEmulatedMerge(b.store.mo)
}
func (b *Batch) Close() error {
b.batch.Reset()
b.batch = nil
b.merge = nil
return nil
}
================================================
FILE: index/upsidedown/store/goleveldb/config.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package goleveldb
import (
"github.com/blevesearch/goleveldb/leveldb/filter"
"github.com/blevesearch/goleveldb/leveldb/opt"
)
func applyConfig(o *opt.Options, config map[string]interface{}) (*opt.Options, error) {
ro, ok := config["read_only"].(bool)
if ok {
o.ReadOnly = ro
}
cim, ok := config["create_if_missing"].(bool)
if ok {
o.ErrorIfMissing = !cim
}
eie, ok := config["error_if_exists"].(bool)
if ok {
o.ErrorIfExist = eie
}
wbs, ok := config["write_buffer_size"].(float64)
if ok {
o.WriteBuffer = int(wbs)
}
bs, ok := config["block_size"].(float64)
if ok {
o.BlockSize = int(bs)
}
bri, ok := config["block_restart_interval"].(float64)
if ok {
o.BlockRestartInterval = int(bri)
}
lcc, ok := config["lru_cache_capacity"].(float64)
if ok {
o.BlockCacheCapacity = int(lcc)
}
bfbpk, ok := config["bloom_filter_bits_per_key"].(float64)
if ok {
bf := filter.NewBloomFilter(int(bfbpk))
o.Filter = bf
}
return o, nil
}
================================================
FILE: index/upsidedown/store/goleveldb/iterator.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package goleveldb
import "github.com/blevesearch/goleveldb/leveldb/iterator"
type Iterator struct {
store *Store
iterator iterator.Iterator
}
func (ldi *Iterator) Seek(key []byte) {
ldi.iterator.Seek(key)
}
func (ldi *Iterator) Next() {
ldi.iterator.Next()
}
func (ldi *Iterator) Current() ([]byte, []byte, bool) {
if ldi.Valid() {
return ldi.Key(), ldi.Value(), true
}
return nil, nil, false
}
func (ldi *Iterator) Key() []byte {
return ldi.iterator.Key()
}
func (ldi *Iterator) Value() []byte {
return ldi.iterator.Value()
}
func (ldi *Iterator) Valid() bool {
return ldi.iterator.Valid()
}
func (ldi *Iterator) Close() error {
ldi.iterator.Release()
return nil
}
================================================
FILE: index/upsidedown/store/goleveldb/reader.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package goleveldb
import (
"github.com/blevesearch/goleveldb/leveldb"
"github.com/blevesearch/goleveldb/leveldb/util"
store "github.com/blevesearch/upsidedown_store_api"
)
type Reader struct {
store *Store
snapshot *leveldb.Snapshot
}
func (r *Reader) Get(key []byte) ([]byte, error) {
b, err := r.snapshot.Get(key, r.store.defaultReadOptions)
if err == leveldb.ErrNotFound {
return nil, nil
}
return b, err
}
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) {
return store.MultiGet(r, keys)
}
func (r *Reader) PrefixIterator(prefix []byte) store.KVIterator {
byteRange := util.BytesPrefix(prefix)
iter := r.snapshot.NewIterator(byteRange, r.store.defaultReadOptions)
iter.First()
rv := Iterator{
store: r.store,
iterator: iter,
}
return &rv
}
func (r *Reader) RangeIterator(start, end []byte) store.KVIterator {
byteRange := &util.Range{
Start: start,
Limit: end,
}
iter := r.snapshot.NewIterator(byteRange, r.store.defaultReadOptions)
iter.First()
rv := Iterator{
store: r.store,
iterator: iter,
}
return &rv
}
func (r *Reader) Close() error {
r.snapshot.Release()
return nil
}
================================================
FILE: index/upsidedown/store/goleveldb/store.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package goleveldb
import (
"bytes"
"fmt"
"os"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/goleveldb/leveldb"
"github.com/blevesearch/goleveldb/leveldb/opt"
"github.com/blevesearch/goleveldb/leveldb/util"
store "github.com/blevesearch/upsidedown_store_api"
)
const (
Name = "goleveldb"
defaultCompactBatchSize = 250
)
type Store struct {
path string
opts *opt.Options
db *leveldb.DB
mo store.MergeOperator
defaultWriteOptions *opt.WriteOptions
defaultReadOptions *opt.ReadOptions
}
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) {
path, ok := config["path"].(string)
if !ok {
return nil, fmt.Errorf("must specify path")
}
if path == "" {
return nil, os.ErrInvalid
}
opts, err := applyConfig(&opt.Options{}, config)
if err != nil {
return nil, err
}
db, err := leveldb.OpenFile(path, opts)
if err != nil {
return nil, err
}
rv := Store{
path: path,
opts: opts,
db: db,
mo: mo,
defaultReadOptions: &opt.ReadOptions{},
defaultWriteOptions: &opt.WriteOptions{},
}
rv.defaultWriteOptions.Sync = true
return &rv, nil
}
func (ldbs *Store) Close() error {
return ldbs.db.Close()
}
func (ldbs *Store) Reader() (store.KVReader, error) {
snapshot, _ := ldbs.db.GetSnapshot()
return &Reader{
store: ldbs,
snapshot: snapshot,
}, nil
}
func (ldbs *Store) Writer() (store.KVWriter, error) {
return &Writer{
store: ldbs,
}, nil
}
// CompactWithBatchSize removes DictionaryTerm entries with a count of zero (in batchSize batches), then
// compacts the underlying goleveldb store. Removing entries is a workaround for github issue #374.
func (ldbs *Store) CompactWithBatchSize(batchSize int) error {
// workaround for github issue #374 - remove DictionaryTerm keys with count=0
batch := &leveldb.Batch{}
for {
t, err := ldbs.db.OpenTransaction()
if err != nil {
return err
}
iter := t.NewIterator(util.BytesPrefix([]byte("d")), ldbs.defaultReadOptions)
for iter.Next() {
if bytes.Equal(iter.Value(), []byte{0}) {
k := append([]byte{}, iter.Key()...)
batch.Delete(k)
}
if batch.Len() == batchSize {
break
}
}
iter.Release()
if iter.Error() != nil {
t.Discard()
return iter.Error()
}
if batch.Len() > 0 {
err := t.Write(batch, ldbs.defaultWriteOptions)
if err != nil {
t.Discard()
return err
}
err = t.Commit()
if err != nil {
return err
}
} else {
t.Discard()
break
}
batch.Reset()
}
return ldbs.db.CompactRange(util.Range{Start: nil, Limit: nil})
}
// Compact compacts the underlying goleveldb store. The current implementation includes a workaround
// for github issue #374 (see CompactWithBatchSize).
func (ldbs *Store) Compact() error {
return ldbs.CompactWithBatchSize(defaultCompactBatchSize)
}
func init() {
err := registry.RegisterKVStore(Name, New)
if err != nil {
panic(err)
}
}
================================================
FILE: index/upsidedown/store/goleveldb/store_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package goleveldb
import (
"os"
"testing"
store "github.com/blevesearch/upsidedown_store_api"
"github.com/blevesearch/upsidedown_store_api/test"
)
func open(t *testing.T, mo store.MergeOperator) store.KVStore {
rv, err := New(mo, map[string]interface{}{
"path": "test",
"create_if_missing": true,
})
if err != nil {
t.Fatal(err)
}
return rv
}
func cleanup(t *testing.T, s store.KVStore) {
err := s.Close()
if err != nil {
t.Fatal(err)
}
err = os.RemoveAll("test")
if err != nil {
t.Fatal(err)
}
}
func TestGoLevelDBKVCrud(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestKVCrud(t, s)
}
func TestGoLevelDBReaderIsolation(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestReaderIsolation(t, s)
}
func TestGoLevelDBReaderOwnsGetBytes(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestReaderOwnsGetBytes(t, s)
}
func TestGoLevelDBWriterOwnsBytes(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestWriterOwnsBytes(t, s)
}
func TestGoLevelDBPrefixIterator(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestPrefixIterator(t, s)
}
func TestGoLevelDBPrefixIteratorSeek(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestPrefixIteratorSeek(t, s)
}
func TestGoLevelDBRangeIterator(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestRangeIterator(t, s)
}
func TestGoLevelDBRangeIteratorSeek(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestRangeIteratorSeek(t, s)
}
func TestGoLevelDBMerge(t *testing.T) {
s := open(t, &test.TestMergeCounter{})
defer cleanup(t, s)
test.CommonTestMerge(t, s)
}
================================================
FILE: index/upsidedown/store/goleveldb/writer.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package goleveldb
import (
"fmt"
"github.com/blevesearch/goleveldb/leveldb"
store "github.com/blevesearch/upsidedown_store_api"
)
type Writer struct {
store *Store
}
func (w *Writer) NewBatch() store.KVBatch {
rv := Batch{
store: w.store,
merge: store.NewEmulatedMerge(w.store.mo),
batch: new(leveldb.Batch),
}
return &rv
}
func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) {
return make([]byte, options.TotalBytes), w.NewBatch(), nil
}
func (w *Writer) ExecuteBatch(b store.KVBatch) error {
batch, ok := b.(*Batch)
if !ok {
return fmt.Errorf("wrong type of batch")
}
// first process merges
for k, mergeOps := range batch.merge.Merges {
kb := []byte(k)
existingVal, err := w.store.db.Get(kb, w.store.defaultReadOptions)
if err != nil && err != leveldb.ErrNotFound {
return err
}
mergedVal, fullMergeOk := w.store.mo.FullMerge(kb, existingVal, mergeOps)
if !fullMergeOk {
return fmt.Errorf("merge operator returned failure")
}
// add the final merge to this batch
batch.batch.Put(kb, mergedVal)
}
// now execute the batch
return w.store.db.Write(batch.batch, w.store.defaultWriteOptions)
}
func (w *Writer) Close() error {
return nil
}
================================================
FILE: index/upsidedown/store/gtreap/iterator.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package gtreap provides an in-memory implementation of the
// KVStore interfaces using the gtreap balanced-binary treap,
// copy-on-write data structure.
package gtreap
import (
"bytes"
"sync"
"github.com/blevesearch/gtreap"
)
type Iterator struct {
t *gtreap.Treap
m sync.Mutex
cancelCh chan struct{}
nextCh chan *Item
curr *Item
currOk bool
prefix []byte
start []byte
end []byte
}
func (w *Iterator) Seek(k []byte) {
if w.start != nil && bytes.Compare(k, w.start) < 0 {
k = w.start
}
if w.prefix != nil && !bytes.HasPrefix(k, w.prefix) {
if bytes.Compare(k, w.prefix) < 0 {
k = w.prefix
} else {
var end []byte
for i := len(w.prefix) - 1; i >= 0; i-- {
c := w.prefix[i]
if c < 0xff {
end = make([]byte, i+1)
copy(end, w.prefix)
end[i] = c + 1
break
}
}
k = end
}
}
w.restart(&Item{k: k})
}
func (w *Iterator) restart(start *Item) *Iterator {
cancelCh := make(chan struct{})
nextCh := make(chan *Item, 1)
w.m.Lock()
if w.cancelCh != nil {
close(w.cancelCh)
}
w.cancelCh = cancelCh
w.nextCh = nextCh
w.curr = nil
w.currOk = false
w.m.Unlock()
go func() {
if start != nil {
w.t.VisitAscend(start, func(itm gtreap.Item) bool {
select {
case <-cancelCh:
return false
case nextCh <- itm.(*Item):
return true
}
})
}
close(nextCh)
}()
w.Next()
return w
}
func (w *Iterator) Next() {
w.m.Lock()
nextCh := w.nextCh
w.m.Unlock()
w.curr, w.currOk = <-nextCh
}
func (w *Iterator) Current() ([]byte, []byte, bool) {
w.m.Lock()
defer w.m.Unlock()
if !w.currOk || w.curr == nil {
return nil, nil, false
}
if w.prefix != nil && !bytes.HasPrefix(w.curr.k, w.prefix) {
return nil, nil, false
} else if w.end != nil && bytes.Compare(w.curr.k, w.end) >= 0 {
return nil, nil, false
}
return w.curr.k, w.curr.v, w.currOk
}
func (w *Iterator) Key() []byte {
k, _, ok := w.Current()
if !ok {
return nil
}
return k
}
func (w *Iterator) Value() []byte {
_, v, ok := w.Current()
if !ok {
return nil
}
return v
}
func (w *Iterator) Valid() bool {
_, _, ok := w.Current()
return ok
}
func (w *Iterator) Close() error {
w.m.Lock()
if w.cancelCh != nil {
close(w.cancelCh)
}
w.cancelCh = nil
w.nextCh = nil
w.curr = nil
w.currOk = false
w.m.Unlock()
return nil
}
================================================
FILE: index/upsidedown/store/gtreap/reader.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package gtreap provides an in-memory implementation of the
// KVStore interfaces using the gtreap balanced-binary treap,
// copy-on-write data structure.
package gtreap
import (
"github.com/blevesearch/upsidedown_store_api"
"github.com/blevesearch/gtreap"
)
type Reader struct {
t *gtreap.Treap
}
func (w *Reader) Get(k []byte) (v []byte, err error) {
var rv []byte
itm := w.t.Get(&Item{k: k})
if itm != nil {
rv = make([]byte, len(itm.(*Item).v))
copy(rv, itm.(*Item).v)
return rv, nil
}
return nil, nil
}
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) {
return store.MultiGet(r, keys)
}
func (w *Reader) PrefixIterator(k []byte) store.KVIterator {
rv := Iterator{
t: w.t,
prefix: k,
}
rv.restart(&Item{k: k})
return &rv
}
func (w *Reader) RangeIterator(start, end []byte) store.KVIterator {
rv := Iterator{
t: w.t,
start: start,
end: end,
}
rv.restart(&Item{k: start})
return &rv
}
func (w *Reader) Close() error {
return nil
}
================================================
FILE: index/upsidedown/store/gtreap/store.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package gtreap provides an in-memory implementation of the
// KVStore interfaces using the gtreap balanced-binary treap,
// copy-on-write data structure.
package gtreap
import (
"bytes"
"fmt"
"os"
"sync"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/gtreap"
store "github.com/blevesearch/upsidedown_store_api"
)
const Name = "gtreap"
type Store struct {
m sync.Mutex
t *gtreap.Treap
mo store.MergeOperator
}
type Item struct {
k []byte
v []byte
}
func itemCompare(a, b interface{}) int {
return bytes.Compare(a.(*Item).k, b.(*Item).k)
}
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) {
path, ok := config["path"].(string)
if !ok {
return nil, fmt.Errorf("must specify path")
}
if path != "" {
return nil, os.ErrInvalid
}
rv := Store{
t: gtreap.NewTreap(itemCompare),
mo: mo,
}
return &rv, nil
}
func (s *Store) Close() error {
return nil
}
func (s *Store) Reader() (store.KVReader, error) {
s.m.Lock()
t := s.t
s.m.Unlock()
return &Reader{t: t}, nil
}
func (s *Store) Writer() (store.KVWriter, error) {
return &Writer{s: s}, nil
}
func init() {
err := registry.RegisterKVStore(Name, New)
if err != nil {
panic(err)
}
}
================================================
FILE: index/upsidedown/store/gtreap/store_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package gtreap
import (
"testing"
store "github.com/blevesearch/upsidedown_store_api"
"github.com/blevesearch/upsidedown_store_api/test"
)
func open(t *testing.T, mo store.MergeOperator) store.KVStore {
rv, err := New(mo, map[string]interface{}{
"path": "",
})
if err != nil {
t.Fatal(err)
}
return rv
}
func cleanup(t *testing.T, s store.KVStore) {
err := s.Close()
if err != nil {
t.Fatal(err)
}
}
func TestGTreapKVCrud(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestKVCrud(t, s)
}
func TestGTreapReaderIsolation(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestReaderIsolation(t, s)
}
func TestGTreapReaderOwnsGetBytes(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestReaderOwnsGetBytes(t, s)
}
func TestGTreapWriterOwnsBytes(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestWriterOwnsBytes(t, s)
}
func TestGTreapPrefixIterator(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestPrefixIterator(t, s)
}
func TestGTreapPrefixIteratorSeek(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestPrefixIteratorSeek(t, s)
}
func TestGTreapRangeIterator(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestRangeIterator(t, s)
}
func TestGTreapRangeIteratorSeek(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestRangeIteratorSeek(t, s)
}
func TestGTreapMerge(t *testing.T) {
s := open(t, &test.TestMergeCounter{})
defer cleanup(t, s)
test.CommonTestMerge(t, s)
}
================================================
FILE: index/upsidedown/store/gtreap/writer.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package gtreap provides an in-memory implementation of the
// KVStore interfaces using the gtreap balanced-binary treap,
// copy-on-write data structure.
package gtreap
import (
"fmt"
"math/rand"
"github.com/blevesearch/upsidedown_store_api"
)
type Writer struct {
s *Store
}
func (w *Writer) NewBatch() store.KVBatch {
return store.NewEmulatedBatch(w.s.mo)
}
func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) {
return make([]byte, options.TotalBytes), w.NewBatch(), nil
}
func (w *Writer) ExecuteBatch(batch store.KVBatch) error {
emulatedBatch, ok := batch.(*store.EmulatedBatch)
if !ok {
return fmt.Errorf("wrong type of batch")
}
w.s.m.Lock()
for k, mergeOps := range emulatedBatch.Merger.Merges {
kb := []byte(k)
var existingVal []byte
existingItem := w.s.t.Get(&Item{k: kb})
if existingItem != nil {
existingVal = w.s.t.Get(&Item{k: kb}).(*Item).v
}
mergedVal, fullMergeOk := w.s.mo.FullMerge(kb, existingVal, mergeOps)
if !fullMergeOk {
return fmt.Errorf("merge operator returned failure")
}
w.s.t = w.s.t.Upsert(&Item{k: kb, v: mergedVal}, rand.Int())
}
for _, op := range emulatedBatch.Ops {
if op.V != nil {
w.s.t = w.s.t.Upsert(&Item{k: op.K, v: op.V}, rand.Int())
} else {
w.s.t = w.s.t.Delete(&Item{k: op.K})
}
}
w.s.m.Unlock()
return nil
}
func (w *Writer) Close() error {
w.s = nil
return nil
}
================================================
FILE: index/upsidedown/store/metrics/batch.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import store "github.com/blevesearch/upsidedown_store_api"
type Batch struct {
s *Store
o store.KVBatch
}
func (b *Batch) Set(key, val []byte) {
b.o.Set(key, val)
}
func (b *Batch) Delete(key []byte) {
b.o.Delete(key)
}
func (b *Batch) Merge(key, val []byte) {
b.s.timerBatchMerge.Time(func() {
b.o.Merge(key, val)
})
}
func (b *Batch) Reset() {
b.o.Reset()
}
func (b *Batch) Close() error {
err := b.o.Close()
b.o = nil
return err
}
================================================
FILE: index/upsidedown/store/metrics/iterator.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import store "github.com/blevesearch/upsidedown_store_api"
type Iterator struct {
s *Store
o store.KVIterator
}
func (i *Iterator) Seek(x []byte) {
i.s.timerIteratorSeek.Time(func() {
i.o.Seek(x)
})
}
func (i *Iterator) Next() {
i.s.timerIteratorNext.Time(func() {
i.o.Next()
})
}
func (i *Iterator) Current() ([]byte, []byte, bool) {
return i.o.Current()
}
func (i *Iterator) Key() []byte {
return i.o.Key()
}
func (i *Iterator) Value() []byte {
return i.o.Value()
}
func (i *Iterator) Valid() bool {
return i.o.Valid()
}
func (i *Iterator) Close() error {
err := i.o.Close()
if err != nil {
i.s.AddError("Iterator.Close", err, nil)
}
return err
}
================================================
FILE: index/upsidedown/store/metrics/metrics_test.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"bytes"
"encoding/json"
"fmt"
"testing"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
)
func TestMetricsStore(t *testing.T) {
_, err := New(nil, map[string]interface{}{})
if err == nil {
t.Errorf("expected err when bad config")
}
_, err = New(nil, map[string]interface{}{
"kvStoreName_actual": "some-invalid-kvstore-name",
})
if err == nil {
t.Errorf("expected err when unknown kvStoreName_actual")
}
s, err := New(nil, map[string]interface{}{
"kvStoreName_actual": gtreap.Name,
"path": "",
})
if err != nil {
t.Fatal(err)
}
b := bytes.NewBuffer(nil)
err = s.(*Store).WriteJSON(b)
if err != nil {
t.Fatal(err)
}
if b.Len() <= 0 {
t.Errorf("expected some output from WriteJSON")
}
var m map[string]interface{}
err = json.Unmarshal(b.Bytes(), &m)
if err != nil {
t.Errorf("expected WriteJSON to be unmarshallable")
}
if len(m) == 0 {
t.Errorf("expected some entries")
}
b = bytes.NewBuffer(nil)
s.(*Store).WriteCSVHeader(b)
if b.Len() <= 0 {
t.Errorf("expected some output from WriteCSVHeader")
}
b = bytes.NewBuffer(nil)
s.(*Store).WriteCSV(b)
if b.Len() <= 0 {
t.Errorf("expected some output from WriteCSV")
}
}
func TestErrors(t *testing.T) {
s, err := New(nil, map[string]interface{}{
"kvStoreName_actual": gtreap.Name,
"path": "",
})
if err != nil {
t.Fatal(err)
}
x, ok := s.(*Store)
if !ok {
t.Errorf("expecting a Store")
}
x.AddError("foo", fmt.Errorf("Foo"), []byte("fooKey"))
x.AddError("bar", fmt.Errorf("Bar"), nil)
x.AddError("baz", fmt.Errorf("Baz"), []byte("bazKey"))
b := bytes.NewBuffer(nil)
err = x.WriteJSON(b)
if err != nil {
t.Fatal(err)
}
var m map[string]interface{}
err = json.Unmarshal(b.Bytes(), &m)
if err != nil {
t.Errorf("expected unmarshallable writeJSON, err: %v, b: %s",
err, b.Bytes())
}
errorsi, ok := m["Errors"]
if !ok || errorsi == nil {
t.Errorf("expected errorsi")
}
errors, ok := errorsi.([]interface{})
if !ok || errors == nil {
t.Errorf("expected errorsi is array")
}
if len(errors) != 3 {
t.Errorf("expected errors len 3")
}
e := errors[0].(map[string]interface{})
if e["Op"].(string) != "foo" ||
e["Err"].(string) != "Foo" ||
len(e["Time"].(string)) < 10 ||
e["Key"].(string) != "fooKey" {
t.Errorf("expected foo, %#v", e)
}
e = errors[1].(map[string]interface{})
if e["Op"].(string) != "bar" ||
e["Err"].(string) != "Bar" ||
len(e["Time"].(string)) < 10 ||
e["Key"].(string) != "" {
t.Errorf("expected bar, %#v", e)
}
e = errors[2].(map[string]interface{})
if e["Op"].(string) != "baz" ||
e["Err"].(string) != "Baz" ||
len(e["Time"].(string)) < 10 ||
e["Key"].(string) != "bazKey" {
t.Errorf("expected baz, %#v", e)
}
}
================================================
FILE: index/upsidedown/store/metrics/reader.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import store "github.com/blevesearch/upsidedown_store_api"
type Reader struct {
s *Store
o store.KVReader
}
func (r *Reader) Get(key []byte) (v []byte, err error) {
r.s.timerReaderGet.Time(func() {
v, err = r.o.Get(key)
if err != nil {
r.s.AddError("Reader.Get", err, key)
}
})
return
}
func (r *Reader) MultiGet(keys [][]byte) (vals [][]byte, err error) {
r.s.timerReaderMultiGet.Time(func() {
vals, err = r.o.MultiGet(keys)
if err != nil {
r.s.AddError("Reader.MultiGet", err, nil)
}
})
return
}
func (r *Reader) PrefixIterator(prefix []byte) (i store.KVIterator) {
r.s.timerReaderPrefixIterator.Time(func() {
i = &Iterator{s: r.s, o: r.o.PrefixIterator(prefix)}
})
return
}
func (r *Reader) RangeIterator(start, end []byte) (i store.KVIterator) {
r.s.timerReaderRangeIterator.Time(func() {
i = &Iterator{s: r.s, o: r.o.RangeIterator(start, end)}
})
return
}
func (r *Reader) Close() error {
err := r.o.Close()
if err != nil {
r.s.AddError("Reader.Close", err, nil)
}
return err
}
================================================
FILE: index/upsidedown/store/metrics/stats.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"github.com/blevesearch/bleve/v2/util"
store "github.com/blevesearch/upsidedown_store_api"
)
type stats struct {
s *Store
}
func (s *stats) statsMap() map[string]interface{} {
ms := map[string]interface{}{}
ms["metrics"] = map[string]interface{}{
"reader_get": TimerMap(s.s.timerReaderGet),
"reader_multi_get": TimerMap(s.s.timerReaderMultiGet),
"reader_prefix_iterator": TimerMap(s.s.timerReaderPrefixIterator),
"reader_range_iterator": TimerMap(s.s.timerReaderRangeIterator),
"writer_execute_batch": TimerMap(s.s.timerWriterExecuteBatch),
"iterator_seek": TimerMap(s.s.timerIteratorSeek),
"iterator_next": TimerMap(s.s.timerIteratorNext),
"batch_merge": TimerMap(s.s.timerBatchMerge),
}
if o, ok := s.s.o.(store.KVStoreStats); ok {
ms["kv"] = o.StatsMap()
}
return ms
}
func (s *stats) MarshalJSON() ([]byte, error) {
m := s.statsMap()
return util.MarshalJSON(m)
}
================================================
FILE: index/upsidedown/store/metrics/store.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package metrics provides a bleve.store.KVStore implementation that
// wraps another, real KVStore implementation, and uses go-metrics to
// track runtime performance metrics.
package metrics
import (
"container/list"
"encoding/json"
"fmt"
"io"
"sync"
"time"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/util"
"github.com/blevesearch/go-metrics"
store "github.com/blevesearch/upsidedown_store_api"
)
const Name = "metrics"
type Store struct {
o store.KVStore
timerReaderGet metrics.Timer
timerReaderMultiGet metrics.Timer
timerReaderPrefixIterator metrics.Timer
timerReaderRangeIterator metrics.Timer
timerWriterExecuteBatch metrics.Timer
timerIteratorSeek metrics.Timer
timerIteratorNext metrics.Timer
timerBatchMerge metrics.Timer
m sync.Mutex // Protects the fields that follow.
errors *list.List // Capped list of StoreError's.
s *stats
}
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) {
name, ok := config["kvStoreName_actual"].(string)
if !ok || name == "" {
return nil, fmt.Errorf("metrics: missing kvStoreName_actual,"+
" config: %#v", config)
}
if name == Name {
return nil, fmt.Errorf("metrics: circular kvStoreName_actual")
}
ctr := registry.KVStoreConstructorByName(name)
if ctr == nil {
return nil, fmt.Errorf("metrics: no kv store constructor,"+
" kvStoreName_actual: %s", name)
}
kvs, err := ctr(mo, config)
if err != nil {
return nil, err
}
rv := &Store{
o: kvs,
timerReaderGet: metrics.NewTimer(),
timerReaderMultiGet: metrics.NewTimer(),
timerReaderPrefixIterator: metrics.NewTimer(),
timerReaderRangeIterator: metrics.NewTimer(),
timerWriterExecuteBatch: metrics.NewTimer(),
timerIteratorSeek: metrics.NewTimer(),
timerIteratorNext: metrics.NewTimer(),
timerBatchMerge: metrics.NewTimer(),
errors: list.New(),
}
rv.s = &stats{s: rv}
return rv, nil
}
func init() {
err := registry.RegisterKVStore(Name, New)
if err != nil {
panic(err)
}
}
func (s *Store) Close() error {
return s.o.Close()
}
func (s *Store) Reader() (store.KVReader, error) {
o, err := s.o.Reader()
if err != nil {
s.AddError("Reader", err, nil)
return nil, err
}
return &Reader{s: s, o: o}, nil
}
func (s *Store) Writer() (store.KVWriter, error) {
o, err := s.o.Writer()
if err != nil {
s.AddError("Writer", err, nil)
return nil, err
}
return &Writer{s: s, o: o}, nil
}
// Metric specific code below:
const MaxErrors = 100
type StoreError struct {
Time string
Op string
Err string
Key string
}
func (s *Store) AddError(op string, err error, key []byte) {
e := &StoreError{
Time: time.Now().Format(time.RFC3339Nano),
Op: op,
Err: fmt.Sprintf("%v", err),
Key: string(key),
}
s.m.Lock()
for s.errors.Len() >= MaxErrors {
s.errors.Remove(s.errors.Front())
}
s.errors.PushBack(e)
s.m.Unlock()
}
func (s *Store) WriteJSON(w io.Writer) (err error) {
_, err = w.Write([]byte(`{"TimerReaderGet":`))
if err != nil {
return
}
WriteTimerJSON(w, s.timerReaderGet)
_, err = w.Write([]byte(`,"TimerReaderMultiGet":`))
if err != nil {
return
}
WriteTimerJSON(w, s.timerReaderMultiGet)
_, err = w.Write([]byte(`,"TimerReaderPrefixIterator":`))
if err != nil {
return
}
WriteTimerJSON(w, s.timerReaderPrefixIterator)
_, err = w.Write([]byte(`,"TimerReaderRangeIterator":`))
if err != nil {
return
}
WriteTimerJSON(w, s.timerReaderRangeIterator)
_, err = w.Write([]byte(`,"TimerWriterExecuteBatch":`))
if err != nil {
return
}
WriteTimerJSON(w, s.timerWriterExecuteBatch)
_, err = w.Write([]byte(`,"TimerIteratorSeek":`))
if err != nil {
return
}
WriteTimerJSON(w, s.timerIteratorSeek)
_, err = w.Write([]byte(`,"TimerIteratorNext":`))
if err != nil {
return
}
WriteTimerJSON(w, s.timerIteratorNext)
_, err = w.Write([]byte(`,"TimerBatchMerge":`))
if err != nil {
return
}
WriteTimerJSON(w, s.timerBatchMerge)
_, err = w.Write([]byte(`,"Errors":[`))
if err != nil {
return
}
s.m.Lock()
defer s.m.Unlock()
e := s.errors.Front()
i := 0
for e != nil {
se, ok := e.Value.(*StoreError)
if ok && se != nil {
if i > 0 {
_, err = w.Write([]byte(","))
if err != nil {
return
}
}
var buf []byte
buf, err = util.MarshalJSON(se)
if err == nil {
_, err = w.Write(buf)
if err != nil {
return
}
}
}
e = e.Next()
i = i + 1
}
_, err = w.Write([]byte(`]`))
if err != nil {
return
}
// see if the underlying implementation has its own stats
if o, ok := s.o.(store.KVStoreStats); ok {
storeStats := o.Stats()
var storeBytes []byte
storeBytes, err = util.MarshalJSON(storeStats)
if err != nil {
return
}
_, err = fmt.Fprintf(w, `, "store": %s`, string(storeBytes))
if err != nil {
return
}
}
_, err = w.Write([]byte(`}`))
if err != nil {
return
}
return
}
func (s *Store) WriteCSVHeader(w io.Writer) {
WriteTimerCSVHeader(w, "TimerReaderGet")
WriteTimerCSVHeader(w, "TimerReaderPrefixIterator")
WriteTimerCSVHeader(w, "TimerReaderRangeIterator")
WriteTimerCSVHeader(w, "TimerWtierExecuteBatch")
WriteTimerCSVHeader(w, "TimerIteratorSeek")
WriteTimerCSVHeader(w, "TimerIteratorNext")
WriteTimerCSVHeader(w, "TimerBatchMerge")
}
func (s *Store) WriteCSV(w io.Writer) {
WriteTimerCSV(w, s.timerReaderGet)
WriteTimerCSV(w, s.timerReaderPrefixIterator)
WriteTimerCSV(w, s.timerReaderRangeIterator)
WriteTimerCSV(w, s.timerWriterExecuteBatch)
WriteTimerCSV(w, s.timerIteratorSeek)
WriteTimerCSV(w, s.timerIteratorNext)
WriteTimerCSV(w, s.timerBatchMerge)
}
func (s *Store) Stats() json.Marshaler {
return s.s
}
func (s *Store) StatsMap() map[string]interface{} {
return s.s.statsMap()
}
================================================
FILE: index/upsidedown/store/metrics/store_test.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"testing"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
store "github.com/blevesearch/upsidedown_store_api"
"github.com/blevesearch/upsidedown_store_api/test"
)
func open(t *testing.T, mo store.MergeOperator) store.KVStore {
rv, err := New(mo, map[string]interface{}{
"kvStoreName_actual": gtreap.Name,
"path": "",
})
if err != nil {
t.Fatal(err)
}
return rv
}
func cleanup(t *testing.T, s store.KVStore) {
err := s.Close()
if err != nil {
t.Fatal(err)
}
}
func TestMetricsKVCrud(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestKVCrud(t, s)
}
func TestMetricsReaderIsolation(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestReaderIsolation(t, s)
}
func TestMetricsReaderOwnsGetBytes(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestReaderOwnsGetBytes(t, s)
}
func TestMetricsWriterOwnsBytes(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestWriterOwnsBytes(t, s)
}
func TestMetricsPrefixIterator(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestPrefixIterator(t, s)
}
func TestMetricsPrefixIteratorSeek(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestPrefixIteratorSeek(t, s)
}
func TestMetricsRangeIterator(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestRangeIterator(t, s)
}
func TestMetricsRangeIteratorSeek(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestRangeIteratorSeek(t, s)
}
func TestMetricsMerge(t *testing.T) {
s := open(t, &test.TestMergeCounter{})
defer cleanup(t, s)
test.CommonTestMerge(t, s)
}
================================================
FILE: index/upsidedown/store/metrics/util.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"fmt"
"io"
"math"
"github.com/blevesearch/go-metrics"
)
// NOTE: This is copy & pasted from cbft as otherwise there
// would be an import cycle.
var timerPercentiles = []float64{0.5, 0.75, 0.95, 0.99, 0.999}
func TimerMap(timer metrics.Timer) map[string]interface{} {
rv := make(map[string]interface{})
t := timer.Snapshot()
p := t.Percentiles(timerPercentiles)
percentileKeys := []string{"median", "75%", "95%", "99%", "99.9%"}
percentiles := make(map[string]interface{})
for i, pi := range p {
if !isNanOrInf(pi) {
percentileKey := percentileKeys[i]
percentiles[percentileKey] = pi
}
}
rateKeys := []string{"1-min", "5-min", "15-min", "mean"}
rates := make(map[string]interface{})
for i, ri := range []float64{t.Rate1(), t.Rate5(), t.Rate15(), t.RateMean()} {
if !isNanOrInf(ri) {
rateKey := rateKeys[i]
rates[rateKey] = ri
}
}
rv["count"] = t.Count()
rv["min"] = t.Min()
rv["max"] = t.Max()
mean := t.Mean()
if !isNanOrInf(mean) {
rv["mean"] = mean
}
stddev := t.StdDev()
if !isNanOrInf(stddev) {
rv["stddev"] = stddev
}
rv["percentiles"] = percentiles
rv["rates"] = rates
return rv
}
func isNanOrInf(v float64) bool {
if math.IsNaN(v) || math.IsInf(v, 0) {
return true
}
return false
}
func WriteTimerJSON(w io.Writer, timer metrics.Timer) {
t := timer.Snapshot()
p := t.Percentiles(timerPercentiles)
fmt.Fprintf(w, `{"count":%9d,`, t.Count())
fmt.Fprintf(w, `"min":%9d,`, t.Min())
fmt.Fprintf(w, `"max":%9d,`, t.Max())
fmt.Fprintf(w, `"mean":%12.2f,`, t.Mean())
fmt.Fprintf(w, `"stddev":%12.2f,`, t.StdDev())
fmt.Fprintf(w, `"percentiles":{`)
fmt.Fprintf(w, `"median":%12.2f,`, p[0])
fmt.Fprintf(w, `"75%%":%12.2f,`, p[1])
fmt.Fprintf(w, `"95%%":%12.2f,`, p[2])
fmt.Fprintf(w, `"99%%":%12.2f,`, p[3])
fmt.Fprintf(w, `"99.9%%":%12.2f},`, p[4])
fmt.Fprintf(w, `"rates":{`)
fmt.Fprintf(w, `"1-min":%12.2f,`, t.Rate1())
fmt.Fprintf(w, `"5-min":%12.2f,`, t.Rate5())
fmt.Fprintf(w, `"15-min":%12.2f,`, t.Rate15())
fmt.Fprintf(w, `"mean":%12.2f}}`, t.RateMean())
}
func WriteTimerCSVHeader(w io.Writer, prefix string) {
fmt.Fprintf(w, "%s-count,", prefix)
fmt.Fprintf(w, "%s-min,", prefix)
fmt.Fprintf(w, "%s-max,", prefix)
fmt.Fprintf(w, "%s-mean,", prefix)
fmt.Fprintf(w, "%s-stddev,", prefix)
fmt.Fprintf(w, "%s-percentile-50%%,", prefix)
fmt.Fprintf(w, "%s-percentile-75%%,", prefix)
fmt.Fprintf(w, "%s-percentile-95%%,", prefix)
fmt.Fprintf(w, "%s-percentile-99%%,", prefix)
fmt.Fprintf(w, "%s-percentile-99.9%%,", prefix)
fmt.Fprintf(w, "%s-rate-1-min,", prefix)
fmt.Fprintf(w, "%s-rate-5-min,", prefix)
fmt.Fprintf(w, "%s-rate-15-min,", prefix)
fmt.Fprintf(w, "%s-rate-mean", prefix)
}
func WriteTimerCSV(w io.Writer, timer metrics.Timer) {
t := timer.Snapshot()
p := t.Percentiles(timerPercentiles)
fmt.Fprintf(w, `%d,`, t.Count())
fmt.Fprintf(w, `%d,`, t.Min())
fmt.Fprintf(w, `%d,`, t.Max())
fmt.Fprintf(w, `%f,`, t.Mean())
fmt.Fprintf(w, `%f,`, t.StdDev())
fmt.Fprintf(w, `%f,`, p[0])
fmt.Fprintf(w, `%f,`, p[1])
fmt.Fprintf(w, `%f,`, p[2])
fmt.Fprintf(w, `%f,`, p[3])
fmt.Fprintf(w, `%f,`, p[4])
fmt.Fprintf(w, `%f,`, t.Rate1())
fmt.Fprintf(w, `%f,`, t.Rate5())
fmt.Fprintf(w, `%f,`, t.Rate15())
fmt.Fprintf(w, `%f`, t.RateMean())
}
================================================
FILE: index/upsidedown/store/metrics/writer.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package metrics
import (
"fmt"
store "github.com/blevesearch/upsidedown_store_api"
)
type Writer struct {
s *Store
o store.KVWriter
}
func (w *Writer) Close() error {
err := w.o.Close()
if err != nil {
w.s.AddError("Writer.Close", err, nil)
}
return err
}
func (w *Writer) NewBatch() store.KVBatch {
return &Batch{s: w.s, o: w.o.NewBatch()}
}
func (w *Writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) {
buf, b, err := w.o.NewBatchEx(options)
if err != nil {
return nil, nil, err
}
return buf, &Batch{s: w.s, o: b}, nil
}
func (w *Writer) ExecuteBatch(b store.KVBatch) (err error) {
batch, ok := b.(*Batch)
if !ok {
return fmt.Errorf("wrong type of batch")
}
w.s.timerWriterExecuteBatch.Time(func() {
err = w.o.ExecuteBatch(batch.o)
if err != nil {
w.s.AddError("Writer.ExecuteBatch", err, nil)
}
})
return
}
================================================
FILE: index/upsidedown/store/moss/batch.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package moss
import (
"github.com/couchbase/moss"
store "github.com/blevesearch/upsidedown_store_api"
)
type Batch struct {
store *Store
merge *store.EmulatedMerge
batch moss.Batch
buf []byte // Non-nil when using pre-alloc'ed / NewBatchEx().
bufUsed int
}
func (b *Batch) Set(key, val []byte) {
var err error
if b.buf != nil {
b.bufUsed += len(key) + len(val)
err = b.batch.AllocSet(key, val)
} else {
err = b.batch.Set(key, val)
}
if err != nil {
b.store.Logf("bleve moss batch.Set err: %v", err)
}
}
func (b *Batch) Delete(key []byte) {
var err error
if b.buf != nil {
b.bufUsed += len(key)
err = b.batch.AllocDel(key)
} else {
err = b.batch.Del(key)
}
if err != nil {
b.store.Logf("bleve moss batch.Delete err: %v", err)
}
}
func (b *Batch) Merge(key, val []byte) {
if b.buf != nil {
b.bufUsed += len(key) + len(val)
}
b.merge.Merge(key, val)
}
func (b *Batch) Reset() {
err := b.Close()
if err != nil {
b.store.Logf("bleve moss batch.Close err: %v", err)
return
}
batch, err := b.store.ms.NewBatch(0, 0)
if err == nil {
b.batch = batch
b.merge = store.NewEmulatedMerge(b.store.mo)
b.buf = nil
b.bufUsed = 0
}
}
func (b *Batch) Close() error {
b.merge = nil
err := b.batch.Close()
b.batch = nil
return err
}
================================================
FILE: index/upsidedown/store/moss/iterator.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package moss
import (
"github.com/couchbase/moss"
)
type Iterator struct {
store *Store
ss moss.Snapshot
iter moss.Iterator
start []byte
end []byte
k []byte
v []byte
err error
}
func (x *Iterator) Seek(seekToKey []byte) {
_ = x.iter.SeekTo(seekToKey)
x.k, x.v, x.err = x.iter.Current()
}
func (x *Iterator) Next() {
_ = x.iter.Next()
x.k, x.v, x.err = x.iter.Current()
}
func (x *Iterator) Current() ([]byte, []byte, bool) {
return x.k, x.v, x.err == nil
}
func (x *Iterator) Key() []byte {
if x.err != nil {
return nil
}
return x.k
}
func (x *Iterator) Value() []byte {
if x.err != nil {
return nil
}
return x.v
}
func (x *Iterator) Valid() bool {
return x.err == nil
}
func (x *Iterator) Close() error {
var err error
x.ss = nil
if x.iter != nil {
err = x.iter.Close()
x.iter = nil
}
x.k = nil
x.v = nil
x.err = moss.ErrIteratorDone
return err
}
func (x *Iterator) current() {
x.k, x.v, x.err = x.iter.Current()
}
================================================
FILE: index/upsidedown/store/moss/lower.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package moss provides a KVStore implementation based on the
// github.com/couchbase/moss library.
package moss
import (
"fmt"
"os"
"sync"
"github.com/couchbase/moss"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/util"
store "github.com/blevesearch/upsidedown_store_api"
)
func initLowerLevelStore(
config map[string]interface{},
lowerLevelStoreName string,
lowerLevelStoreConfig map[string]interface{},
lowerLevelMaxBatchSize uint64,
options moss.CollectionOptions,
) (moss.Snapshot, moss.LowerLevelUpdate, store.KVStore, statsFunc, error) {
if lowerLevelStoreConfig == nil {
lowerLevelStoreConfig = map[string]interface{}{}
}
for k, v := range config {
_, exists := lowerLevelStoreConfig[k]
if !exists {
lowerLevelStoreConfig[k] = v
}
}
if lowerLevelStoreName == "mossStore" {
return InitMossStore(lowerLevelStoreConfig, options)
}
constructor := registry.KVStoreConstructorByName(lowerLevelStoreName)
if constructor == nil {
return nil, nil, nil, nil, fmt.Errorf("moss store, initLowerLevelStore,"+
" could not find lower level store: %s", lowerLevelStoreName)
}
kvStore, err := constructor(options.MergeOperator, lowerLevelStoreConfig)
if err != nil {
return nil, nil, nil, nil, err
}
llStore := &llStore{
refs: 0,
config: config,
llConfig: lowerLevelStoreConfig,
kvStore: kvStore,
logf: options.Log,
}
llUpdate := func(ssHigher moss.Snapshot) (ssLower moss.Snapshot, err error) {
return llStore.update(ssHigher, lowerLevelMaxBatchSize)
}
llSnapshot, err := llUpdate(nil)
if err != nil {
_ = kvStore.Close()
return nil, nil, nil, nil, err
}
return llSnapshot, llUpdate, kvStore, nil, nil // llStore.refs is now 1.
}
// ------------------------------------------------
// llStore is a lower level store and provides ref-counting around a
// bleve store.KVStore.
type llStore struct {
kvStore store.KVStore
config map[string]interface{}
llConfig map[string]interface{}
logf func(format string, a ...interface{})
m sync.Mutex // Protects fields that follow.
refs int
}
// llSnapshot represents a lower-level snapshot, wrapping a bleve
// store.KVReader, and implements the moss.Snapshot interface.
type llSnapshot struct {
llStore *llStore // Holds 1 refs on the llStore.
kvReader store.KVReader
childSnapshots map[string]*llSnapshot
m sync.Mutex // Protects fields that follow.
refs int
}
// llIterator represents a lower-level iterator, wrapping a bleve
// store.KVIterator, and implements the moss.Iterator interface.
type llIterator struct {
llSnapshot *llSnapshot // Holds 1 refs on the llSnapshot.
// Some lower-level KVReader implementations need a separate
// KVReader clone, due to KVReader single-threaded'ness.
kvReader store.KVReader
kvIterator store.KVIterator
}
type readerSource interface {
Reader() (store.KVReader, error)
}
// ------------------------------------------------
func (s *llStore) addRef() *llStore {
s.m.Lock()
s.refs += 1
s.m.Unlock()
return s
}
func (s *llStore) decRef() {
s.m.Lock()
s.refs -= 1
if s.refs <= 0 {
err := s.kvStore.Close()
if err != nil {
s.logf("llStore kvStore.Close err: %v", err)
}
}
s.m.Unlock()
}
// update() mutates this lower level store with latest data from the
// given higher level moss.Snapshot and returns a new moss.Snapshot
// that the higher level can use which represents this lower level
// store.
func (s *llStore) update(ssHigher moss.Snapshot, maxBatchSize uint64) (
ssLower moss.Snapshot, err error,
) {
if ssHigher != nil {
iter, err := ssHigher.StartIterator(nil, nil, moss.IteratorOptions{
IncludeDeletions: true,
SkipLowerLevel: true,
})
if err != nil {
return nil, err
}
defer func() {
err = iter.Close()
if err != nil {
s.logf("llStore iter.Close err: %v", err)
}
}()
kvWriter, err := s.kvStore.Writer()
if err != nil {
return nil, err
}
defer func() {
err = kvWriter.Close()
if err != nil {
s.logf("llStore kvWriter.Close err: %v", err)
}
}()
batch := kvWriter.NewBatch()
defer func() {
if batch != nil {
err = batch.Close()
if err != nil {
s.logf("llStore batch.Close err: %v", err)
}
}
}()
var readOptions moss.ReadOptions
i := uint64(0)
for {
if i%1000000 == 0 {
s.logf("llStore.update, i: %d", i)
}
ex, key, val, err := iter.CurrentEx()
if err == moss.ErrIteratorDone {
break
}
if err != nil {
return nil, err
}
switch ex.Operation {
case moss.OperationSet:
batch.Set(key, val)
case moss.OperationDel:
batch.Delete(key)
case moss.OperationMerge:
val, err = ssHigher.Get(key, readOptions)
if err != nil {
return nil, err
}
if val != nil {
batch.Set(key, val)
} else {
batch.Delete(key)
}
default:
return nil, fmt.Errorf("moss store, update,"+
" unexpected operation, ex: %v", ex)
}
i++
err = iter.Next()
if err == moss.ErrIteratorDone {
break
}
if err != nil {
return nil, err
}
if maxBatchSize > 0 && i%maxBatchSize == 0 {
err = kvWriter.ExecuteBatch(batch)
if err != nil {
return nil, err
}
err = batch.Close()
if err != nil {
return nil, err
}
batch = kvWriter.NewBatch()
}
}
if i > 0 {
s.logf("llStore.update, ExecuteBatch,"+
" path: %s, total: %d, start", s.llConfig["path"], i)
err = kvWriter.ExecuteBatch(batch)
if err != nil {
return nil, err
}
s.logf("llStore.update, ExecuteBatch,"+
" path: %s: total: %d, done", s.llConfig["path"], i)
}
}
kvReader, err := s.kvStore.Reader()
if err != nil {
return nil, err
}
s.logf("llStore.update, new reader")
return &llSnapshot{
llStore: s.addRef(),
kvReader: kvReader,
refs: 1,
}, nil
}
// ------------------------------------------------
func (llss *llSnapshot) addRef() *llSnapshot {
llss.m.Lock()
llss.refs += 1
llss.m.Unlock()
return llss
}
func (llss *llSnapshot) decRef() {
llss.m.Lock()
llss.refs -= 1
if llss.refs <= 0 {
if llss.kvReader != nil {
err := llss.kvReader.Close()
if err != nil {
llss.llStore.logf("llSnapshot kvReader.Close err: %v", err)
}
llss.kvReader = nil
}
if llss.llStore != nil {
llss.llStore.decRef()
llss.llStore = nil
}
}
llss.m.Unlock()
}
// ChildCollectionNames returns an array of child collection name strings.
func (llss *llSnapshot) ChildCollectionNames() ([]string, error) {
childCollections := make([]string, len(llss.childSnapshots))
idx := 0
for name := range llss.childSnapshots {
childCollections[idx] = name
idx++
}
return childCollections, nil
}
// ChildCollectionSnapshot returns a Snapshot on a given child
// collection by its name.
func (llss *llSnapshot) ChildCollectionSnapshot(childCollectionName string) (
moss.Snapshot, error,
) {
childSnapshot, exists := llss.childSnapshots[childCollectionName]
if !exists {
return nil, nil
}
childSnapshot.addRef()
return childSnapshot, nil
}
func (llss *llSnapshot) Close() error {
llss.decRef()
return nil
}
func (llss *llSnapshot) Get(key []byte,
readOptions moss.ReadOptions,
) ([]byte, error) {
rs, ok := llss.kvReader.(readerSource)
if ok {
r2, err := rs.Reader()
if err != nil {
return nil, err
}
val, err := r2.Get(key)
_ = r2.Close()
return val, err
}
return llss.kvReader.Get(key)
}
func (llss *llSnapshot) StartIterator(
startKeyInclusive, endKeyExclusive []byte,
iteratorOptions moss.IteratorOptions,
) (moss.Iterator, error) {
rs, ok := llss.kvReader.(readerSource)
if ok {
r2, err := rs.Reader()
if err != nil {
return nil, err
}
i2 := r2.RangeIterator(startKeyInclusive, endKeyExclusive)
return &llIterator{llSnapshot: llss.addRef(), kvReader: r2, kvIterator: i2}, nil
}
i := llss.kvReader.RangeIterator(startKeyInclusive, endKeyExclusive)
return &llIterator{llSnapshot: llss.addRef(), kvReader: nil, kvIterator: i}, nil
}
// ------------------------------------------------
func (lli *llIterator) Close() error {
var err0 error
if lli.kvIterator != nil {
err0 = lli.kvIterator.Close()
lli.kvIterator = nil
}
var err1 error
if lli.kvReader != nil {
err1 = lli.kvReader.Close()
lli.kvReader = nil
}
lli.llSnapshot.decRef()
lli.llSnapshot = nil
if err0 != nil {
return err0
}
if err1 != nil {
return err1
}
return nil
}
func (lli *llIterator) Next() error {
lli.kvIterator.Next()
return nil
}
func (lli *llIterator) SeekTo(k []byte) error {
lli.kvIterator.Seek(k)
return nil
}
func (lli *llIterator) Current() (key, val []byte, err error) {
key, val, ok := lli.kvIterator.Current()
if !ok {
return nil, nil, moss.ErrIteratorDone
}
return key, val, nil
}
func (lli *llIterator) CurrentEx() (
entryEx moss.EntryEx, key, val []byte, err error,
) {
return moss.EntryEx{}, nil, nil, moss.ErrUnimplemented
}
// ------------------------------------------------
func InitMossStore(config map[string]interface{}, options moss.CollectionOptions) (
moss.Snapshot, moss.LowerLevelUpdate, store.KVStore, statsFunc, error,
) {
path, ok := config["path"].(string)
if !ok {
return nil, nil, nil, nil, fmt.Errorf("lower: missing path for InitMossStore config")
}
if path == "" {
return nil, nil, nil, nil, os.ErrInvalid
}
err := os.MkdirAll(path, 0o700)
if err != nil {
return nil, nil, nil, nil, fmt.Errorf("lower: InitMossStore mkdir,"+
" path: %s, err: %v", path, err)
}
storeOptions := moss.StoreOptions{
CollectionOptions: options,
}
v, ok := config["mossStoreOptions"]
if ok {
b, err := util.MarshalJSON(v) // Convert from map[string]interface{}.
if err != nil {
return nil, nil, nil, nil, err
}
err = util.UnmarshalJSON(b, &storeOptions)
if err != nil {
return nil, nil, nil, nil, err
}
}
s, err := moss.OpenStore(path, storeOptions)
if err != nil {
return nil, nil, nil, nil, fmt.Errorf("lower: moss.OpenStore,"+
" path: %s, err: %v", path, err)
}
sw := &mossStoreWrapper{s: s}
llUpdate := func(ssHigher moss.Snapshot) (moss.Snapshot, error) {
ss, err := sw.s.Persist(ssHigher, moss.StorePersistOptions{
CompactionConcern: moss.CompactionAllow,
})
if err != nil {
return nil, err
}
sw.AddRef() // Ref-count to be owned by snapshot wrapper.
return moss.NewSnapshotWrapper(ss, sw), nil
}
llSnapshot, err := llUpdate(nil)
if err != nil {
_ = s.Close()
return nil, nil, nil, nil, err
}
llStats := func() map[string]interface{} {
stats, err := s.Stats()
if err != nil {
return nil
}
return stats
}
return llSnapshot, llUpdate, sw, llStats, nil
}
// mossStoreWrapper implements the bleve.index.store.KVStore
// interface, but only barely enough to allow it to be passed around
// as a lower-level store. Advanced apps will likely cast the
// mossStoreWrapper to access the Actual() method.
type mossStoreWrapper struct {
m sync.Mutex
refs int
s *moss.Store
}
func (w *mossStoreWrapper) AddRef() {
w.m.Lock()
w.refs++
w.m.Unlock()
}
func (w *mossStoreWrapper) Close() (err error) {
w.m.Lock()
w.refs--
if w.refs <= 0 {
err = w.s.Close()
w.s = nil
}
w.m.Unlock()
return err
}
func (w *mossStoreWrapper) Reader() (store.KVReader, error) {
return nil, fmt.Errorf("unexpected")
}
func (w *mossStoreWrapper) Writer() (store.KVWriter, error) {
return nil, fmt.Errorf("unexpected")
}
func (w *mossStoreWrapper) Actual() *moss.Store {
w.m.Lock()
rv := w.s
w.m.Unlock()
return rv
}
func (w *mossStoreWrapper) histograms() string {
var rv string
w.m.Lock()
if w.s != nil {
rv = w.s.Histograms().String()
}
w.m.Unlock()
return rv
}
================================================
FILE: index/upsidedown/store/moss/lower_test.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package moss
import (
"os"
"testing"
store "github.com/blevesearch/upsidedown_store_api"
"github.com/blevesearch/upsidedown_store_api/test"
)
func openWithLower(t *testing.T, mo store.MergeOperator) (string, store.KVStore) {
tmpDir, _ := os.MkdirTemp("", "mossStore")
config := map[string]interface{}{
"path": tmpDir,
"mossLowerLevelStoreName": "mossStore",
}
rv, err := New(mo, config)
if err != nil {
t.Fatal(err)
}
return tmpDir, rv
}
func cleanupWithLower(t *testing.T, s store.KVStore, tmpDir string) {
err := s.Close()
if err != nil {
t.Fatal(err)
}
err = os.RemoveAll(tmpDir)
if err != nil {
t.Fatal(err)
}
}
func TestMossWithLowerKVCrud(t *testing.T) {
tmpDir, s := openWithLower(t, nil)
defer cleanupWithLower(t, s, tmpDir)
test.CommonTestKVCrud(t, s)
}
func TestMossWithLowerReaderIsolation(t *testing.T) {
tmpDir, s := openWithLower(t, nil)
defer cleanupWithLower(t, s, tmpDir)
test.CommonTestReaderIsolation(t, s)
}
func TestMossWithLowerReaderOwnsGetBytes(t *testing.T) {
tmpDir, s := openWithLower(t, nil)
defer cleanupWithLower(t, s, tmpDir)
test.CommonTestReaderOwnsGetBytes(t, s)
}
func TestMossWithLowerWriterOwnsBytes(t *testing.T) {
tmpDir, s := openWithLower(t, nil)
defer cleanupWithLower(t, s, tmpDir)
test.CommonTestWriterOwnsBytes(t, s)
}
func TestMossWithLowerPrefixIterator(t *testing.T) {
tmpDir, s := openWithLower(t, nil)
defer cleanupWithLower(t, s, tmpDir)
test.CommonTestPrefixIterator(t, s)
}
func TestMossWithLowerPrefixIteratorSeek(t *testing.T) {
tmpDir, s := openWithLower(t, nil)
defer cleanupWithLower(t, s, tmpDir)
test.CommonTestPrefixIteratorSeek(t, s)
}
func TestMossWithLowerRangeIterator(t *testing.T) {
tmpDir, s := openWithLower(t, nil)
defer cleanupWithLower(t, s, tmpDir)
test.CommonTestRangeIterator(t, s)
}
func TestMossWithLowerRangeIteratorSeek(t *testing.T) {
tmpDir, s := openWithLower(t, nil)
defer cleanupWithLower(t, s, tmpDir)
test.CommonTestRangeIteratorSeek(t, s)
}
func TestMossWithLowerMerge(t *testing.T) {
tmpDir, s := openWithLower(t, &test.TestMergeCounter{})
defer cleanupWithLower(t, s, tmpDir)
test.CommonTestMerge(t, s)
}
================================================
FILE: index/upsidedown/store/moss/reader.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package moss
import (
"github.com/couchbase/moss"
store "github.com/blevesearch/upsidedown_store_api"
)
type Reader struct {
store *Store
ss moss.Snapshot
}
func (r *Reader) Get(k []byte) (v []byte, err error) {
v, err = r.ss.Get(k, moss.ReadOptions{})
if err != nil {
return nil, err
}
if v != nil {
return append(make([]byte, 0, len(v)), v...), nil
}
return nil, nil
}
func (r *Reader) MultiGet(keys [][]byte) ([][]byte, error) {
return store.MultiGet(r, keys)
}
func (r *Reader) PrefixIterator(k []byte) store.KVIterator {
kEnd := incrementBytes(k)
iter, err := r.ss.StartIterator(k, kEnd, moss.IteratorOptions{})
if err != nil {
return nil
}
rv := &Iterator{
store: r.store,
ss: r.ss,
iter: iter,
start: k,
end: kEnd,
}
rv.current()
return rv
}
func (r *Reader) RangeIterator(start, end []byte) store.KVIterator {
iter, err := r.ss.StartIterator(start, end, moss.IteratorOptions{})
if err != nil {
return nil
}
rv := &Iterator{
store: r.store,
ss: r.ss,
iter: iter,
start: start,
end: end,
}
rv.current()
return rv
}
func (r *Reader) Close() error {
return r.ss.Close()
}
func incrementBytes(in []byte) []byte {
rv := make([]byte, len(in))
copy(rv, in)
for i := len(rv) - 1; i >= 0; i-- {
rv[i] = rv[i] + 1
if rv[i] != 0 {
return rv // didn't overflow, so stop
}
}
return nil // overflowed
}
================================================
FILE: index/upsidedown/store/moss/stats.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package moss
import (
"github.com/blevesearch/bleve/v2/util"
store "github.com/blevesearch/upsidedown_store_api"
)
type stats struct {
s *Store
}
func (s *stats) statsMap() map[string]interface{} {
ms := map[string]interface{}{}
var err error
ms["moss"], err = s.s.ms.Stats()
if err != nil {
return ms
}
if s.s.llstore != nil {
if o, ok := s.s.llstore.(store.KVStoreStats); ok {
ms["kv"] = o.StatsMap()
}
}
_, exists := ms["kv"]
if !exists && s.s.llstats != nil {
ms["kv"] = s.s.llstats()
}
if msw, ok := s.s.llstore.(*mossStoreWrapper); ok {
ms["store_histograms"] = msw.histograms()
}
ms["coll_histograms"] = s.s.ms.Histograms().String()
return ms
}
func (s *stats) MarshalJSON() ([]byte, error) {
m := s.statsMap()
return util.MarshalJSON(m)
}
================================================
FILE: index/upsidedown/store/moss/store.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// Package moss provides a KVStore implementation based on the
// github.com/couchbase/moss library.
package moss
import (
"encoding/json"
"fmt"
"sync"
"github.com/couchbase/moss"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/util"
store "github.com/blevesearch/upsidedown_store_api"
)
// RegistryCollectionOptions should be treated as read-only after
// process init()'ialization.
var RegistryCollectionOptions = map[string]moss.CollectionOptions{}
const Name = "moss"
type Store struct {
m sync.Mutex
ms moss.Collection
mo store.MergeOperator
llstore store.KVStore // May be nil.
llstats statsFunc // May be nil.
s *stats
config map[string]interface{}
}
type statsFunc func() map[string]interface{}
// New initializes a moss storage with values from the optional
// config["mossCollectionOptions"] (a JSON moss.CollectionOptions).
// Next, values from the RegistryCollectionOptions, named by the
// optional config["mossCollectionOptionsName"], take precedence.
// Finally, base case defaults are taken from
// moss.DefaultCollectionOptions.
func New(mo store.MergeOperator, config map[string]interface{}) (
store.KVStore, error) {
options := moss.DefaultCollectionOptions // Copy.
v, ok := config["mossCollectionOptionsName"]
if ok {
name, ok := v.(string)
if !ok {
return nil, fmt.Errorf("moss store,"+
" could not parse config[mossCollectionOptionsName]: %v", v)
}
options, ok = RegistryCollectionOptions[name] // Copy.
if !ok {
return nil, fmt.Errorf("moss store,"+
" could not find RegistryCollectionOptions, name: %s", name)
}
}
options.MergeOperator = mo
options.DeferredSort = true
v, ok = config["mossCollectionOptions"]
if ok {
b, err := util.MarshalJSON(v) // Convert from map[string]interface{}.
if err != nil {
return nil, fmt.Errorf("moss store,"+
" could not marshal config[mossCollectionOptions]: %v, err: %v", v, err)
}
err = util.UnmarshalJSON(b, &options)
if err != nil {
return nil, fmt.Errorf("moss store,"+
" could not unmarshal config[mossCollectionOptions]: %v, err: %v", v, err)
}
}
// --------------------------------------------------
if options.Log == nil || options.Debug <= 0 {
options.Log = func(format string, a ...interface{}) {}
}
// --------------------------------------------------
mossLowerLevelStoreName := ""
v, ok = config["mossLowerLevelStoreName"]
if ok {
mossLowerLevelStoreName, ok = v.(string)
if !ok {
return nil, fmt.Errorf("moss store,"+
" could not parse config[mossLowerLevelStoreName]: %v", v)
}
}
var llStore store.KVStore
var llStats statsFunc
if options.LowerLevelInit == nil &&
options.LowerLevelUpdate == nil &&
mossLowerLevelStoreName != "" {
mossLowerLevelStoreConfig := map[string]interface{}{}
v, ok := config["mossLowerLevelStoreConfig"]
if ok {
mossLowerLevelStoreConfig, ok = v.(map[string]interface{})
if !ok {
return nil, fmt.Errorf("moss store, initLowerLevelStore,"+
" could parse mossLowerLevelStoreConfig: %v", v)
}
}
mossLowerLevelMaxBatchSize := uint64(0)
v, ok = config["mossLowerLevelMaxBatchSize"]
if ok {
mossLowerLevelMaxBatchSizeF, ok := v.(float64)
if !ok {
return nil, fmt.Errorf("moss store,"+
" could not parse config[mossLowerLevelMaxBatchSize]: %v", v)
}
mossLowerLevelMaxBatchSize = uint64(mossLowerLevelMaxBatchSizeF)
}
lowerLevelInit, lowerLevelUpdate, lowerLevelStore, lowerLevelStats, err :=
initLowerLevelStore(config,
mossLowerLevelStoreName,
mossLowerLevelStoreConfig,
mossLowerLevelMaxBatchSize,
options)
if err != nil {
return nil, err
}
options.LowerLevelInit = lowerLevelInit
options.LowerLevelUpdate = lowerLevelUpdate
llStore = lowerLevelStore
llStats = lowerLevelStats
}
// --------------------------------------------------
ms, err := moss.NewCollection(options)
if err != nil {
return nil, err
}
err = ms.Start()
if err != nil {
return nil, err
}
rv := Store{
ms: ms,
mo: mo,
llstore: llStore,
llstats: llStats,
config: config,
}
rv.s = &stats{s: &rv}
return &rv, nil
}
func (s *Store) Close() error {
if val, ok := s.config["mossAbortCloseEnabled"]; ok {
if v, ok := val.(bool); ok && v {
if msw, ok := s.llstore.(*mossStoreWrapper); ok {
if s := msw.Actual(); s != nil {
_ = s.CloseEx(moss.StoreCloseExOptions{Abort: true})
}
}
}
}
return s.ms.Close()
}
func (s *Store) Reader() (store.KVReader, error) {
ss, err := s.ms.Snapshot()
if err != nil {
return nil, err
}
return &Reader{ss: ss}, nil
}
func (s *Store) Writer() (store.KVWriter, error) {
return &Writer{s: s}, nil
}
func (s *Store) Logf(fmt string, args ...interface{}) {
options := s.ms.Options()
if options.Log != nil {
options.Log(fmt, args...)
}
}
func (s *Store) Stats() json.Marshaler {
return s.s
}
func (s *Store) StatsMap() map[string]interface{} {
return s.s.statsMap()
}
func (s *Store) LowerLevelStore() store.KVStore {
return s.llstore
}
func (s *Store) Collection() moss.Collection {
return s.ms
}
func init() {
err := registry.RegisterKVStore(Name, New)
if err != nil {
panic(err)
}
}
================================================
FILE: index/upsidedown/store/moss/store_test.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package moss
import (
"testing"
store "github.com/blevesearch/upsidedown_store_api"
"github.com/blevesearch/upsidedown_store_api/test"
)
func open(t *testing.T, mo store.MergeOperator) store.KVStore {
rv, err := New(mo, nil)
if err != nil {
t.Fatal(err)
}
return rv
}
func cleanup(t *testing.T, s store.KVStore) {
err := s.Close()
if err != nil {
t.Fatal(err)
}
}
func TestMossKVCrud(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestKVCrud(t, s)
}
func TestMossReaderIsolation(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestReaderIsolation(t, s)
}
func TestMossReaderOwnsGetBytes(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestReaderOwnsGetBytes(t, s)
}
func TestMossWriterOwnsBytes(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestWriterOwnsBytes(t, s)
}
func TestMossPrefixIterator(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestPrefixIterator(t, s)
}
func TestMossPrefixIteratorSeek(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestPrefixIteratorSeek(t, s)
}
func TestMossRangeIterator(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestRangeIterator(t, s)
}
func TestMossRangeIteratorSeek(t *testing.T) {
s := open(t, nil)
defer cleanup(t, s)
test.CommonTestRangeIteratorSeek(t, s)
}
func TestMossMerge(t *testing.T) {
s := open(t, &test.TestMergeCounter{})
defer cleanup(t, s)
test.CommonTestMerge(t, s)
}
================================================
FILE: index/upsidedown/store/moss/writer.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package moss
import (
"fmt"
store "github.com/blevesearch/upsidedown_store_api"
"github.com/couchbase/moss"
)
type Writer struct {
s *Store
}
func (w *Writer) NewBatch() store.KVBatch {
b, err := w.s.ms.NewBatch(0, 0)
if err != nil {
return nil
}
return &Batch{
store: w.s,
merge: store.NewEmulatedMerge(w.s.mo),
batch: b,
}
}
func (w *Writer) NewBatchEx(options store.KVBatchOptions) (
[]byte, store.KVBatch, error) {
numOps := options.NumSets + options.NumDeletes + options.NumMerges
b, err := w.s.ms.NewBatch(numOps, options.TotalBytes)
if err != nil {
return nil, nil, err
}
buf, err := b.Alloc(options.TotalBytes)
if err != nil {
return nil, nil, err
}
return buf, &Batch{
store: w.s,
merge: store.NewEmulatedMerge(w.s.mo),
batch: b,
buf: buf,
bufUsed: 0,
}, nil
}
func (w *Writer) ExecuteBatch(b store.KVBatch) (err error) {
batch, ok := b.(*Batch)
if !ok {
return fmt.Errorf("wrong type of batch")
}
for kStr, mergeOps := range batch.merge.Merges {
for _, v := range mergeOps {
if batch.buf != nil {
kLen := len(kStr)
vLen := len(v)
kBuf := batch.buf[batch.bufUsed : batch.bufUsed+kLen]
vBuf := batch.buf[batch.bufUsed+kLen : batch.bufUsed+kLen+vLen]
copy(kBuf, kStr)
copy(vBuf, v)
batch.bufUsed += kLen + vLen
err = batch.batch.AllocMerge(kBuf, vBuf)
} else {
err = batch.batch.Merge([]byte(kStr), v)
}
if err != nil {
return err
}
}
}
return w.s.ms.ExecuteBatch(batch.batch, moss.WriteOptions{})
}
func (w *Writer) Close() error {
w.s = nil
return nil
}
================================================
FILE: index/upsidedown/store/null/null.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package null
import (
"github.com/blevesearch/bleve/v2/registry"
store "github.com/blevesearch/upsidedown_store_api"
)
const Name = "null"
type Store struct{}
func New(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error) {
return &Store{}, nil
}
func (i *Store) Close() error {
return nil
}
func (i *Store) Reader() (store.KVReader, error) {
return &reader{}, nil
}
func (i *Store) Writer() (store.KVWriter, error) {
return &writer{}, nil
}
type reader struct{}
func (r *reader) Get(key []byte) ([]byte, error) {
return nil, nil
}
func (r *reader) MultiGet(keys [][]byte) ([][]byte, error) {
return make([][]byte, len(keys)), nil
}
func (r *reader) PrefixIterator(prefix []byte) store.KVIterator {
return &iterator{}
}
func (r *reader) RangeIterator(start, end []byte) store.KVIterator {
return &iterator{}
}
func (r *reader) Close() error {
return nil
}
type iterator struct{}
func (i *iterator) SeekFirst() {}
func (i *iterator) Seek(k []byte) {}
func (i *iterator) Next() {}
func (i *iterator) Current() ([]byte, []byte, bool) {
return nil, nil, false
}
func (i *iterator) Key() []byte {
return nil
}
func (i *iterator) Value() []byte {
return nil
}
func (i *iterator) Valid() bool {
return false
}
func (i *iterator) Close() error {
return nil
}
type batch struct{}
func (i *batch) Set(key, val []byte) {}
func (i *batch) Delete(key []byte) {}
func (i *batch) Merge(key, val []byte) {}
func (i *batch) Reset() {}
func (i *batch) Close() error { return nil }
type writer struct{}
func (w *writer) NewBatch() store.KVBatch {
return &batch{}
}
func (w *writer) NewBatchEx(options store.KVBatchOptions) ([]byte, store.KVBatch, error) {
return make([]byte, options.TotalBytes), w.NewBatch(), nil
}
func (w *writer) ExecuteBatch(store.KVBatch) error {
return nil
}
func (w *writer) Close() error {
return nil
}
func init() {
err := registry.RegisterKVStore(Name, New)
if err != nil {
panic(err)
}
}
================================================
FILE: index/upsidedown/store/null/null_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package null
import (
"testing"
store "github.com/blevesearch/upsidedown_store_api"
)
func TestStore(t *testing.T) {
s, err := New(nil, nil)
if err != nil {
t.Fatal(err)
}
NullTestKVStore(t, s)
}
// NullTestKVStore has very different expectations
// compared to CommonTestKVStore
func NullTestKVStore(t *testing.T, s store.KVStore) {
writer, err := s.Writer()
if err != nil {
t.Error(err)
}
batch := writer.NewBatch()
batch.Set([]byte("b"), []byte("val-b"))
batch.Set([]byte("c"), []byte("val-c"))
batch.Set([]byte("d"), []byte("val-d"))
batch.Set([]byte("e"), []byte("val-e"))
batch.Set([]byte("f"), []byte("val-f"))
batch.Set([]byte("g"), []byte("val-g"))
batch.Set([]byte("h"), []byte("val-h"))
batch.Set([]byte("i"), []byte("val-i"))
batch.Set([]byte("j"), []byte("val-j"))
err = writer.ExecuteBatch(batch)
if err != nil {
t.Fatal(err)
}
err = writer.Close()
if err != nil {
t.Fatal(err)
}
reader, err := s.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := reader.Close()
if err != nil {
t.Fatal(err)
}
}()
it := reader.RangeIterator([]byte("b"), nil)
key, val, valid := it.Current()
if valid {
t.Fatalf("valid true, expected false")
}
if key != nil {
t.Fatalf("expected key nil, got %s", key)
}
if val != nil {
t.Fatalf("expected value nil, got %s", val)
}
err = it.Close()
if err != nil {
t.Fatal(err)
}
err = s.Close()
if err != nil {
t.Fatal(err)
}
}
================================================
FILE: index/upsidedown/upsidedown.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:generate protoc --gofast_out=. upsidedown.proto
package upsidedown
import (
"encoding/binary"
"encoding/json"
"fmt"
"math"
"sync"
"sync/atomic"
"time"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/registry"
index "github.com/blevesearch/bleve_index_api"
store "github.com/blevesearch/upsidedown_store_api"
"google.golang.org/protobuf/proto"
)
const Name = "upside_down"
// RowBufferSize should ideally this is sized to be the smallest
// size that can contain an index row key and its corresponding
// value. It is not a limit, if need be a larger buffer is
// allocated, but performance will be more optimal if *most*
// rows fit this size.
const RowBufferSize = 4 * 1024
var VersionKey = []byte{'v'}
const Version uint8 = 7
var IncompatibleVersion = fmt.Errorf("incompatible version, %d is supported", Version)
var ErrorUnknownStorageType = fmt.Errorf("unknown storage type")
type UpsideDownCouch struct {
version uint8
path string
storeName string
storeConfig map[string]interface{}
store store.KVStore
fieldCache *FieldCache
analysisQueue *index.AnalysisQueue
stats *indexStat
m sync.RWMutex
// fields protected by m
docCount uint64
writeMutex sync.Mutex
}
type docBackIndexRow struct {
docID string
doc index.Document // If deletion, doc will be nil.
backIndexRow *BackIndexRow
}
func NewUpsideDownCouch(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error) {
rv := &UpsideDownCouch{
version: Version,
fieldCache: NewFieldCache(),
storeName: storeName,
storeConfig: storeConfig,
analysisQueue: analysisQueue,
}
rv.stats = &indexStat{i: rv}
return rv, nil
}
func (udc *UpsideDownCouch) init(kvwriter store.KVWriter) (err error) {
// version marker
rowsAll := [][]UpsideDownCouchRow{
{NewVersionRow(udc.version)},
}
err = udc.batchRows(kvwriter, nil, rowsAll, nil)
return
}
func (udc *UpsideDownCouch) loadSchema(kvreader store.KVReader) (err error) {
it := kvreader.PrefixIterator([]byte{'f'})
defer func() {
if cerr := it.Close(); err == nil && cerr != nil {
err = cerr
}
}()
key, val, valid := it.Current()
for valid {
var fieldRow *FieldRow
fieldRow, err = NewFieldRowKV(key, val)
if err != nil {
return
}
udc.fieldCache.AddExisting(fieldRow.name, fieldRow.index)
it.Next()
key, val, valid = it.Current()
}
val, err = kvreader.Get([]byte{'v'})
if err != nil {
return
}
var vr *VersionRow
vr, err = NewVersionRowKV([]byte{'v'}, val)
if err != nil {
return
}
if vr.version != Version {
err = IncompatibleVersion
return
}
return
}
type rowBuffer struct {
buf []byte
}
var rowBufferPool sync.Pool
func GetRowBuffer() *rowBuffer {
if rb, ok := rowBufferPool.Get().(*rowBuffer); ok {
return rb
} else {
buf := make([]byte, RowBufferSize)
return &rowBuffer{buf: buf}
}
}
func PutRowBuffer(rb *rowBuffer) {
rowBufferPool.Put(rb)
}
func (udc *UpsideDownCouch) batchRows(writer store.KVWriter, addRowsAll [][]UpsideDownCouchRow, updateRowsAll [][]UpsideDownCouchRow, deleteRowsAll [][]UpsideDownCouchRow) (err error) {
dictionaryDeltas := make(map[string]int64)
// count up bytes needed for buffering.
addNum := 0
addKeyBytes := 0
addValBytes := 0
updateNum := 0
updateKeyBytes := 0
updateValBytes := 0
deleteNum := 0
deleteKeyBytes := 0
rowBuf := GetRowBuffer()
for _, addRows := range addRowsAll {
for _, row := range addRows {
tfr, ok := row.(*TermFrequencyRow)
if ok {
if tfr.DictionaryRowKeySize() > len(rowBuf.buf) {
rowBuf.buf = make([]byte, tfr.DictionaryRowKeySize())
}
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf.buf)
if err != nil {
return err
}
dictionaryDeltas[string(rowBuf.buf[:dictKeySize])] += 1
}
addKeyBytes += row.KeySize()
addValBytes += row.ValueSize()
}
addNum += len(addRows)
}
for _, updateRows := range updateRowsAll {
for _, row := range updateRows {
updateKeyBytes += row.KeySize()
updateValBytes += row.ValueSize()
}
updateNum += len(updateRows)
}
for _, deleteRows := range deleteRowsAll {
for _, row := range deleteRows {
tfr, ok := row.(*TermFrequencyRow)
if ok {
// need to decrement counter
if tfr.DictionaryRowKeySize() > len(rowBuf.buf) {
rowBuf.buf = make([]byte, tfr.DictionaryRowKeySize())
}
dictKeySize, err := tfr.DictionaryRowKeyTo(rowBuf.buf)
if err != nil {
return err
}
dictionaryDeltas[string(rowBuf.buf[:dictKeySize])] -= 1
}
deleteKeyBytes += row.KeySize()
}
deleteNum += len(deleteRows)
}
PutRowBuffer(rowBuf)
mergeNum := len(dictionaryDeltas)
mergeKeyBytes := 0
mergeValBytes := mergeNum * DictionaryRowMaxValueSize
for dictRowKey := range dictionaryDeltas {
mergeKeyBytes += len(dictRowKey)
}
// prepare batch
totBytes := addKeyBytes + addValBytes +
updateKeyBytes + updateValBytes +
deleteKeyBytes +
2*(mergeKeyBytes+mergeValBytes)
buf, wb, err := writer.NewBatchEx(store.KVBatchOptions{
TotalBytes: totBytes,
NumSets: addNum + updateNum,
NumDeletes: deleteNum,
NumMerges: mergeNum,
})
if err != nil {
return err
}
defer func() {
_ = wb.Close()
}()
// fill the batch
for _, addRows := range addRowsAll {
for _, row := range addRows {
keySize, err := row.KeyTo(buf)
if err != nil {
return err
}
valSize, err := row.ValueTo(buf[keySize:])
if err != nil {
return err
}
wb.Set(buf[:keySize], buf[keySize:keySize+valSize])
buf = buf[keySize+valSize:]
}
}
for _, updateRows := range updateRowsAll {
for _, row := range updateRows {
keySize, err := row.KeyTo(buf)
if err != nil {
return err
}
valSize, err := row.ValueTo(buf[keySize:])
if err != nil {
return err
}
wb.Set(buf[:keySize], buf[keySize:keySize+valSize])
buf = buf[keySize+valSize:]
}
}
for _, deleteRows := range deleteRowsAll {
for _, row := range deleteRows {
keySize, err := row.KeyTo(buf)
if err != nil {
return err
}
wb.Delete(buf[:keySize])
buf = buf[keySize:]
}
}
for dictRowKey, delta := range dictionaryDeltas {
dictRowKeyLen := copy(buf, dictRowKey)
binary.LittleEndian.PutUint64(buf[dictRowKeyLen:], uint64(delta))
wb.Merge(buf[:dictRowKeyLen], buf[dictRowKeyLen:dictRowKeyLen+DictionaryRowMaxValueSize])
buf = buf[dictRowKeyLen+DictionaryRowMaxValueSize:]
}
// write out the batch
return writer.ExecuteBatch(wb)
}
func (udc *UpsideDownCouch) Open() (err error) {
// acquire the write mutex for the duration of Open()
udc.writeMutex.Lock()
defer udc.writeMutex.Unlock()
// open the kv store
storeConstructor := registry.KVStoreConstructorByName(udc.storeName)
if storeConstructor == nil {
err = ErrorUnknownStorageType
return
}
// now open the store
udc.store, err = storeConstructor(&mergeOperator, udc.storeConfig)
if err != nil {
return
}
// start a reader to look at the index
var kvreader store.KVReader
kvreader, err = udc.store.Reader()
if err != nil {
return
}
var value []byte
value, err = kvreader.Get(VersionKey)
if err != nil {
_ = kvreader.Close()
return
}
if value != nil {
err = udc.loadSchema(kvreader)
if err != nil {
_ = kvreader.Close()
return
}
// set doc count
udc.m.Lock()
udc.docCount, err = udc.countDocs(kvreader)
udc.m.Unlock()
err = kvreader.Close()
} else {
// new index, close the reader and open writer to init
err = kvreader.Close()
if err != nil {
return
}
var kvwriter store.KVWriter
kvwriter, err = udc.store.Writer()
if err != nil {
return
}
defer func() {
if cerr := kvwriter.Close(); err == nil && cerr != nil {
err = cerr
}
}()
// init the index
err = udc.init(kvwriter)
}
return
}
func (udc *UpsideDownCouch) countDocs(kvreader store.KVReader) (count uint64, err error) {
it := kvreader.PrefixIterator([]byte{'b'})
defer func() {
if cerr := it.Close(); err == nil && cerr != nil {
err = cerr
}
}()
_, _, valid := it.Current()
for valid {
count++
it.Next()
_, _, valid = it.Current()
}
return
}
func (udc *UpsideDownCouch) rowCount() (count uint64, err error) {
// start an isolated reader for use during the rowcount
kvreader, err := udc.store.Reader()
if err != nil {
return
}
defer func() {
if cerr := kvreader.Close(); err == nil && cerr != nil {
err = cerr
}
}()
it := kvreader.RangeIterator(nil, nil)
defer func() {
if cerr := it.Close(); err == nil && cerr != nil {
err = cerr
}
}()
_, _, valid := it.Current()
for valid {
count++
it.Next()
_, _, valid = it.Current()
}
return
}
func (udc *UpsideDownCouch) Close() error {
return udc.store.Close()
}
func (udc *UpsideDownCouch) Update(doc index.Document) (err error) {
// do analysis before acquiring write lock
analysisStart := time.Now()
resultChan := make(chan *AnalysisResult)
// put the work on the queue
udc.analysisQueue.Queue(func() {
ar := udc.analyze(doc)
resultChan <- ar
})
// wait for the result
result := <-resultChan
close(resultChan)
atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart)))
udc.writeMutex.Lock()
defer udc.writeMutex.Unlock()
// open a reader for backindex lookup
var kvreader store.KVReader
kvreader, err = udc.store.Reader()
if err != nil {
return
}
// first we lookup the backindex row for the doc id if it exists
// lookup the back index row
var backIndexRow *BackIndexRow
backIndexRow, err = backIndexRowForDoc(kvreader, index.IndexInternalID(doc.ID()))
if err != nil {
_ = kvreader.Close()
atomic.AddUint64(&udc.stats.errors, 1)
return
}
err = kvreader.Close()
if err != nil {
return
}
return udc.UpdateWithAnalysis(doc, result, backIndexRow)
}
func (udc *UpsideDownCouch) UpdateWithAnalysis(doc index.Document,
result *AnalysisResult, backIndexRow *BackIndexRow) (err error) {
// start a writer for this update
indexStart := time.Now()
var kvwriter store.KVWriter
kvwriter, err = udc.store.Writer()
if err != nil {
return
}
defer func() {
if cerr := kvwriter.Close(); err == nil && cerr != nil {
err = cerr
}
}()
// prepare a list of rows
var addRowsAll [][]UpsideDownCouchRow
var updateRowsAll [][]UpsideDownCouchRow
var deleteRowsAll [][]UpsideDownCouchRow
addRows, updateRows, deleteRows := udc.mergeOldAndNew(backIndexRow, result.Rows)
if len(addRows) > 0 {
addRowsAll = append(addRowsAll, addRows)
}
if len(updateRows) > 0 {
updateRowsAll = append(updateRowsAll, updateRows)
}
if len(deleteRows) > 0 {
deleteRowsAll = append(deleteRowsAll, deleteRows)
}
err = udc.batchRows(kvwriter, addRowsAll, updateRowsAll, deleteRowsAll)
if err == nil && backIndexRow == nil {
udc.m.Lock()
udc.docCount++
udc.m.Unlock()
}
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
if err == nil {
atomic.AddUint64(&udc.stats.updates, 1)
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, doc.NumPlainTextBytes())
} else {
atomic.AddUint64(&udc.stats.errors, 1)
}
return
}
func (udc *UpsideDownCouch) mergeOldAndNew(backIndexRow *BackIndexRow, rows []IndexRow) (addRows []UpsideDownCouchRow, updateRows []UpsideDownCouchRow, deleteRows []UpsideDownCouchRow) {
addRows = make([]UpsideDownCouchRow, 0, len(rows))
if backIndexRow == nil {
addRows = addRows[0:len(rows)]
for i, row := range rows {
addRows[i] = row
}
return addRows, nil, nil
}
updateRows = make([]UpsideDownCouchRow, 0, len(rows))
deleteRows = make([]UpsideDownCouchRow, 0, len(rows))
var existingTermKeys map[string]struct{}
backIndexTermKeys := backIndexRow.AllTermKeys()
if len(backIndexTermKeys) > 0 {
existingTermKeys = make(map[string]struct{}, len(backIndexTermKeys))
for _, key := range backIndexTermKeys {
existingTermKeys[string(key)] = struct{}{}
}
}
var existingStoredKeys map[string]struct{}
backIndexStoredKeys := backIndexRow.AllStoredKeys()
if len(backIndexStoredKeys) > 0 {
existingStoredKeys = make(map[string]struct{}, len(backIndexStoredKeys))
for _, key := range backIndexStoredKeys {
existingStoredKeys[string(key)] = struct{}{}
}
}
keyBuf := GetRowBuffer()
for _, row := range rows {
switch row := row.(type) {
case *TermFrequencyRow:
if existingTermKeys != nil {
if row.KeySize() > len(keyBuf.buf) {
keyBuf.buf = make([]byte, row.KeySize())
}
keySize, _ := row.KeyTo(keyBuf.buf)
if _, ok := existingTermKeys[string(keyBuf.buf[:keySize])]; ok {
updateRows = append(updateRows, row)
delete(existingTermKeys, string(keyBuf.buf[:keySize]))
continue
}
}
addRows = append(addRows, row)
case *StoredRow:
if existingStoredKeys != nil {
if row.KeySize() > len(keyBuf.buf) {
keyBuf.buf = make([]byte, row.KeySize())
}
keySize, _ := row.KeyTo(keyBuf.buf)
if _, ok := existingStoredKeys[string(keyBuf.buf[:keySize])]; ok {
updateRows = append(updateRows, row)
delete(existingStoredKeys, string(keyBuf.buf[:keySize]))
continue
}
}
addRows = append(addRows, row)
default:
updateRows = append(updateRows, row)
}
}
PutRowBuffer(keyBuf)
// any of the existing rows that weren't updated need to be deleted
for existingTermKey := range existingTermKeys {
termFreqRow, err := NewTermFrequencyRowK([]byte(existingTermKey))
if err == nil {
deleteRows = append(deleteRows, termFreqRow)
}
}
// any of the existing stored fields that weren't updated need to be deleted
for existingStoredKey := range existingStoredKeys {
storedRow, err := NewStoredRowK([]byte(existingStoredKey))
if err == nil {
deleteRows = append(deleteRows, storedRow)
}
}
return addRows, updateRows, deleteRows
}
func (udc *UpsideDownCouch) storeField(docID []byte, field index.Field, fieldIndex uint16, rows []IndexRow, backIndexStoredEntries []*BackIndexStoreEntry) ([]IndexRow, []*BackIndexStoreEntry) {
fieldType := field.EncodedFieldType()
storedRow := NewStoredRow(docID, fieldIndex, field.ArrayPositions(), fieldType, field.Value())
// record the back index entry
backIndexStoredEntry := BackIndexStoreEntry{Field: proto.Uint32(uint32(fieldIndex)), ArrayPositions: field.ArrayPositions()}
return append(rows, storedRow), append(backIndexStoredEntries, &backIndexStoredEntry)
}
func (udc *UpsideDownCouch) indexField(docID []byte, includeTermVectors bool, fieldIndex uint16, fieldLength int, tokenFreqs index.TokenFrequencies, rows []IndexRow, backIndexTermsEntries []*BackIndexTermsEntry) ([]IndexRow, []*BackIndexTermsEntry) {
fieldNorm := float32(1.0 / math.Sqrt(float64(fieldLength)))
termFreqRows := make([]TermFrequencyRow, len(tokenFreqs))
termFreqRowsUsed := 0
terms := make([]string, 0, len(tokenFreqs))
for k, tf := range tokenFreqs {
termFreqRow := &termFreqRows[termFreqRowsUsed]
termFreqRowsUsed++
InitTermFrequencyRow(termFreqRow, tf.Term, fieldIndex, docID,
uint64(frequencyFromTokenFreq(tf)), fieldNorm)
if includeTermVectors {
termFreqRow.vectors, rows = udc.termVectorsFromTokenFreq(fieldIndex, tf, rows)
}
// record the back index entry
terms = append(terms, k)
rows = append(rows, termFreqRow)
}
backIndexTermsEntry := BackIndexTermsEntry{Field: proto.Uint32(uint32(fieldIndex)), Terms: terms}
backIndexTermsEntries = append(backIndexTermsEntries, &backIndexTermsEntry)
return rows, backIndexTermsEntries
}
func (udc *UpsideDownCouch) Delete(id string) (err error) {
indexStart := time.Now()
udc.writeMutex.Lock()
defer udc.writeMutex.Unlock()
// open a reader for backindex lookup
var kvreader store.KVReader
kvreader, err = udc.store.Reader()
if err != nil {
return
}
// first we lookup the backindex row for the doc id if it exists
// lookup the back index row
var backIndexRow *BackIndexRow
backIndexRow, err = backIndexRowForDoc(kvreader, index.IndexInternalID(id))
if err != nil {
_ = kvreader.Close()
atomic.AddUint64(&udc.stats.errors, 1)
return
}
err = kvreader.Close()
if err != nil {
return
}
if backIndexRow == nil {
atomic.AddUint64(&udc.stats.deletes, 1)
return
}
// start a writer for this delete
var kvwriter store.KVWriter
kvwriter, err = udc.store.Writer()
if err != nil {
return
}
defer func() {
if cerr := kvwriter.Close(); err == nil && cerr != nil {
err = cerr
}
}()
var deleteRowsAll [][]UpsideDownCouchRow
deleteRows := udc.deleteSingle(id, backIndexRow, nil)
if len(deleteRows) > 0 {
deleteRowsAll = append(deleteRowsAll, deleteRows)
}
err = udc.batchRows(kvwriter, nil, nil, deleteRowsAll)
if err == nil {
udc.m.Lock()
udc.docCount--
udc.m.Unlock()
}
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
if err == nil {
atomic.AddUint64(&udc.stats.deletes, 1)
} else {
atomic.AddUint64(&udc.stats.errors, 1)
}
return
}
func (udc *UpsideDownCouch) deleteSingle(id string, backIndexRow *BackIndexRow, deleteRows []UpsideDownCouchRow) []UpsideDownCouchRow {
idBytes := []byte(id)
for _, backIndexEntry := range backIndexRow.termsEntries {
for i := range backIndexEntry.Terms {
tfr := NewTermFrequencyRow([]byte(backIndexEntry.Terms[i]), uint16(*backIndexEntry.Field), idBytes, 0, 0)
deleteRows = append(deleteRows, tfr)
}
}
for _, se := range backIndexRow.storedEntries {
sf := NewStoredRow(idBytes, uint16(*se.Field), se.ArrayPositions, 'x', nil)
deleteRows = append(deleteRows, sf)
}
// also delete the back entry itself
deleteRows = append(deleteRows, backIndexRow)
return deleteRows
}
func decodeFieldType(typ byte, name string, pos []uint64, value []byte) document.Field {
switch typ {
case 't':
return document.NewTextField(name, pos, value)
case 'n':
return document.NewNumericFieldFromBytes(name, pos, value)
case 'd':
return document.NewDateTimeFieldFromBytes(name, pos, value)
case 'b':
return document.NewBooleanFieldFromBytes(name, pos, value)
case 'g':
return document.NewGeoPointFieldFromBytes(name, pos, value)
case 'i':
return document.NewIPFieldFromBytes(name, pos, value)
}
return nil
}
func frequencyFromTokenFreq(tf *index.TokenFreq) int {
return tf.Frequency()
}
func (udc *UpsideDownCouch) termVectorsFromTokenFreq(field uint16, tf *index.TokenFreq, rows []IndexRow) ([]*TermVector, []IndexRow) {
a := make([]TermVector, len(tf.Locations))
rv := make([]*TermVector, len(tf.Locations))
for i, l := range tf.Locations {
var newFieldRow *FieldRow
fieldIndex := field
if l.Field != "" {
// lookup correct field
fieldIndex, newFieldRow = udc.fieldIndexOrNewRow(l.Field)
if newFieldRow != nil {
rows = append(rows, newFieldRow)
}
}
a[i] = TermVector{
field: fieldIndex,
arrayPositions: l.ArrayPositions,
pos: uint64(l.Position),
start: uint64(l.Start),
end: uint64(l.End),
}
rv[i] = &a[i]
}
return rv, rows
}
func (udc *UpsideDownCouch) termFieldVectorsFromTermVectors(in []*TermVector) []*index.TermFieldVector {
if len(in) == 0 {
return nil
}
a := make([]index.TermFieldVector, len(in))
rv := make([]*index.TermFieldVector, len(in))
for i, tv := range in {
fieldName := udc.fieldCache.FieldIndexed(tv.field)
a[i] = index.TermFieldVector{
Field: fieldName,
ArrayPositions: tv.arrayPositions,
Pos: tv.pos,
Start: tv.start,
End: tv.end,
}
rv[i] = &a[i]
}
return rv
}
func (udc *UpsideDownCouch) Batch(batch *index.Batch) (err error) {
persistedCallback := batch.PersistedCallback()
if persistedCallback != nil {
defer persistedCallback(err)
}
analysisStart := time.Now()
resultChan := make(chan *AnalysisResult, len(batch.IndexOps))
var numUpdates uint64
var numPlainTextBytes uint64
for _, doc := range batch.IndexOps {
if doc != nil {
numUpdates++
numPlainTextBytes += doc.NumPlainTextBytes()
}
}
if numUpdates > 0 {
go func() {
for k := range batch.IndexOps {
doc := batch.IndexOps[k]
if doc != nil {
// put the work on the queue
udc.analysisQueue.Queue(func() {
ar := udc.analyze(doc)
resultChan <- ar
})
}
}
}()
}
// retrieve back index rows concurrent with analysis
docBackIndexRowErr := error(nil)
docBackIndexRowCh := make(chan *docBackIndexRow, len(batch.IndexOps))
udc.writeMutex.Lock()
defer udc.writeMutex.Unlock()
go func() {
defer close(docBackIndexRowCh)
// open a reader for backindex lookup
var kvreader store.KVReader
kvreader, err = udc.store.Reader()
if err != nil {
docBackIndexRowErr = err
return
}
defer func() {
if cerr := kvreader.Close(); err == nil && cerr != nil {
docBackIndexRowErr = cerr
}
}()
for docID, doc := range batch.IndexOps {
backIndexRow, err := backIndexRowForDoc(kvreader, index.IndexInternalID(docID))
if err != nil {
docBackIndexRowErr = err
return
}
docBackIndexRowCh <- &docBackIndexRow{docID, doc, backIndexRow}
}
}()
// wait for analysis result
newRowsMap := make(map[string][]IndexRow)
var itemsDeQueued uint64
for itemsDeQueued < numUpdates {
result := <-resultChan
newRowsMap[result.DocID] = result.Rows
itemsDeQueued++
}
close(resultChan)
atomic.AddUint64(&udc.stats.analysisTime, uint64(time.Since(analysisStart)))
docsAdded := uint64(0)
docsDeleted := uint64(0)
indexStart := time.Now()
// prepare a list of rows
var addRowsAll [][]UpsideDownCouchRow
var updateRowsAll [][]UpsideDownCouchRow
var deleteRowsAll [][]UpsideDownCouchRow
// add the internal ops
var updateRows []UpsideDownCouchRow
var deleteRows []UpsideDownCouchRow
for internalKey, internalValue := range batch.InternalOps {
if internalValue == nil {
// delete
deleteInternalRow := NewInternalRow([]byte(internalKey), nil)
deleteRows = append(deleteRows, deleteInternalRow)
} else {
updateInternalRow := NewInternalRow([]byte(internalKey), internalValue)
updateRows = append(updateRows, updateInternalRow)
}
}
if len(updateRows) > 0 {
updateRowsAll = append(updateRowsAll, updateRows)
}
if len(deleteRows) > 0 {
deleteRowsAll = append(deleteRowsAll, deleteRows)
}
// process back index rows as they arrive
for dbir := range docBackIndexRowCh {
if dbir.doc == nil && dbir.backIndexRow != nil {
// delete
deleteRows := udc.deleteSingle(dbir.docID, dbir.backIndexRow, nil)
if len(deleteRows) > 0 {
deleteRowsAll = append(deleteRowsAll, deleteRows)
}
docsDeleted++
} else if dbir.doc != nil {
addRows, updateRows, deleteRows := udc.mergeOldAndNew(dbir.backIndexRow, newRowsMap[dbir.docID])
if len(addRows) > 0 {
addRowsAll = append(addRowsAll, addRows)
}
if len(updateRows) > 0 {
updateRowsAll = append(updateRowsAll, updateRows)
}
if len(deleteRows) > 0 {
deleteRowsAll = append(deleteRowsAll, deleteRows)
}
if dbir.backIndexRow == nil {
docsAdded++
}
}
}
if docBackIndexRowErr != nil {
return docBackIndexRowErr
}
// start a writer for this batch
var kvwriter store.KVWriter
kvwriter, err = udc.store.Writer()
if err != nil {
return
}
err = udc.batchRows(kvwriter, addRowsAll, updateRowsAll, deleteRowsAll)
if err != nil {
_ = kvwriter.Close()
atomic.AddUint64(&udc.stats.errors, 1)
return
}
err = kvwriter.Close()
atomic.AddUint64(&udc.stats.indexTime, uint64(time.Since(indexStart)))
if err == nil {
udc.m.Lock()
udc.docCount += docsAdded
udc.docCount -= docsDeleted
udc.m.Unlock()
atomic.AddUint64(&udc.stats.updates, numUpdates)
atomic.AddUint64(&udc.stats.deletes, docsDeleted)
atomic.AddUint64(&udc.stats.batches, 1)
atomic.AddUint64(&udc.stats.numPlainTextBytesIndexed, numPlainTextBytes)
} else {
atomic.AddUint64(&udc.stats.errors, 1)
}
return
}
func (udc *UpsideDownCouch) SetInternal(key, val []byte) (err error) {
internalRow := NewInternalRow(key, val)
udc.writeMutex.Lock()
defer udc.writeMutex.Unlock()
var writer store.KVWriter
writer, err = udc.store.Writer()
if err != nil {
return
}
defer func() {
if cerr := writer.Close(); err == nil && cerr != nil {
err = cerr
}
}()
batch := writer.NewBatch()
batch.Set(internalRow.Key(), internalRow.Value())
return writer.ExecuteBatch(batch)
}
func (udc *UpsideDownCouch) DeleteInternal(key []byte) (err error) {
internalRow := NewInternalRow(key, nil)
udc.writeMutex.Lock()
defer udc.writeMutex.Unlock()
var writer store.KVWriter
writer, err = udc.store.Writer()
if err != nil {
return
}
defer func() {
if cerr := writer.Close(); err == nil && cerr != nil {
err = cerr
}
}()
batch := writer.NewBatch()
batch.Delete(internalRow.Key())
return writer.ExecuteBatch(batch)
}
func (udc *UpsideDownCouch) Reader() (index.IndexReader, error) {
kvr, err := udc.store.Reader()
if err != nil {
return nil, fmt.Errorf("error opening store reader: %v", err)
}
udc.m.RLock()
defer udc.m.RUnlock()
return &IndexReader{
index: udc,
kvreader: kvr,
docCount: udc.docCount,
}, nil
}
func (udc *UpsideDownCouch) Stats() json.Marshaler {
return udc.stats
}
func (udc *UpsideDownCouch) StatsMap() map[string]interface{} {
return udc.stats.statsMap()
}
func (udc *UpsideDownCouch) Advanced() (store.KVStore, error) {
return udc.store, nil
}
func (udc *UpsideDownCouch) fieldIndexOrNewRow(name string) (uint16, *FieldRow) {
index, existed := udc.fieldCache.FieldNamed(name, true)
if !existed {
return index, NewFieldRow(index, name)
}
return index, nil
}
func init() {
err := registry.RegisterIndexType(Name, NewUpsideDownCouch)
if err != nil {
panic(err)
}
}
func backIndexRowForDoc(kvreader store.KVReader, docID index.IndexInternalID) (*BackIndexRow, error) {
// use a temporary row structure to build key
tempRow := BackIndexRow{
doc: docID,
}
keyBuf := GetRowBuffer()
if tempRow.KeySize() > len(keyBuf.buf) {
keyBuf.buf = make([]byte, 2*tempRow.KeySize())
}
defer PutRowBuffer(keyBuf)
keySize, err := tempRow.KeyTo(keyBuf.buf)
if err != nil {
return nil, err
}
value, err := kvreader.Get(keyBuf.buf[:keySize])
if err != nil {
return nil, err
}
if value == nil {
return nil, nil
}
backIndexRow, err := NewBackIndexRowKV(keyBuf.buf[:keySize], value)
if err != nil {
return nil, err
}
return backIndexRow, nil
}
================================================
FILE: index/upsidedown/upsidedown.pb.go
================================================
// Code generated by protoc-gen-go. DO NOT EDIT.
// versions:
// protoc-gen-go v1.36.6
// protoc v5.29.3
// source: index/upsidedown/upsidedown.proto
package upsidedown
import (
fmt "fmt"
protoreflect "google.golang.org/protobuf/reflect/protoreflect"
protoimpl "google.golang.org/protobuf/runtime/protoimpl"
io "io"
reflect "reflect"
sync "sync"
unsafe "unsafe"
)
const (
// Verify that this generated code is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(20 - protoimpl.MinVersion)
// Verify that runtime/protoimpl is sufficiently up-to-date.
_ = protoimpl.EnforceVersion(protoimpl.MaxVersion - 20)
)
var (
ErrInvalidLengthUpsidedown = fmt.Errorf("proto: negative length found during unmarshaling")
)
type BackIndexTermsEntry struct {
state protoimpl.MessageState `protogen:"open.v1"`
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"`
Terms []string `protobuf:"bytes,2,rep,name=terms" json:"terms,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *BackIndexTermsEntry) Reset() {
*x = BackIndexTermsEntry{}
mi := &file_index_upsidedown_upsidedown_proto_msgTypes[0]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *BackIndexTermsEntry) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*BackIndexTermsEntry) ProtoMessage() {}
func (x *BackIndexTermsEntry) ProtoReflect() protoreflect.Message {
mi := &file_index_upsidedown_upsidedown_proto_msgTypes[0]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use BackIndexTermsEntry.ProtoReflect.Descriptor instead.
func (*BackIndexTermsEntry) Descriptor() ([]byte, []int) {
return file_index_upsidedown_upsidedown_proto_rawDescGZIP(), []int{0}
}
func (x *BackIndexTermsEntry) GetField() uint32 {
if x != nil && x.Field != nil {
return *x.Field
}
return 0
}
func (x *BackIndexTermsEntry) GetTerms() []string {
if x != nil {
return x.Terms
}
return nil
}
func (x *BackIndexTermsEntry) MarshalTo(data []byte) (n int, err error) {
var i int
_ = i
var l int
_ = l
if x.Field == nil {
return 0, fmt.Errorf("missing required `Field`")
} else {
data[i] = 0x8
i++
i = encodeVarintUpsidedown(data, i, uint64(*x.Field))
}
if len(x.Terms) > 0 {
for _, s := range x.Terms {
data[i] = 0x12
i++
l = len(s)
for l >= 1<<7 {
data[i] = uint8(uint64(l)&0x7f | 0x80)
l >>= 7
i++
}
data[i] = uint8(l)
i++
i += copy(data[i:], s)
}
}
return i, nil
}
func (x *BackIndexTermsEntry) Size() (n int) {
var l int
_ = l
if x.Field != nil {
n += 1 + sovUpsidedown(uint64(*x.Field))
}
if len(x.Terms) > 0 {
for _, s := range x.Terms {
l = len(s)
n += 1 + l + sovUpsidedown(uint64(l))
}
}
return n
}
type BackIndexStoreEntry struct {
state protoimpl.MessageState `protogen:"open.v1"`
Field *uint32 `protobuf:"varint,1,req,name=field" json:"field,omitempty"`
ArrayPositions []uint64 `protobuf:"varint,2,rep,name=arrayPositions" json:"arrayPositions,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *BackIndexStoreEntry) Reset() {
*x = BackIndexStoreEntry{}
mi := &file_index_upsidedown_upsidedown_proto_msgTypes[1]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *BackIndexStoreEntry) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*BackIndexStoreEntry) ProtoMessage() {}
func (x *BackIndexStoreEntry) ProtoReflect() protoreflect.Message {
mi := &file_index_upsidedown_upsidedown_proto_msgTypes[1]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use BackIndexStoreEntry.ProtoReflect.Descriptor instead.
func (*BackIndexStoreEntry) Descriptor() ([]byte, []int) {
return file_index_upsidedown_upsidedown_proto_rawDescGZIP(), []int{1}
}
func (x *BackIndexStoreEntry) GetField() uint32 {
if x != nil && x.Field != nil {
return *x.Field
}
return 0
}
func (x *BackIndexStoreEntry) GetArrayPositions() []uint64 {
if x != nil {
return x.ArrayPositions
}
return nil
}
func (x *BackIndexStoreEntry) MarshalTo(data []byte) (n int, err error) {
var i int
_ = i
var l int
_ = l
if x.Field == nil {
return 0, fmt.Errorf("missing required `Field`")
} else {
data[i] = 0x8
i++
i = encodeVarintUpsidedown(data, i, uint64(*x.Field))
}
if len(x.ArrayPositions) > 0 {
for _, num := range x.ArrayPositions {
data[i] = 0x10
i++
i = encodeVarintUpsidedown(data, i, uint64(num))
}
}
return i, nil
}
func (x *BackIndexStoreEntry) Size() (n int) {
var l int
_ = l
if x.Field != nil {
n += 1 + sovUpsidedown(uint64(*x.Field))
}
if len(x.ArrayPositions) > 0 {
for _, e := range x.ArrayPositions {
n += 1 + sovUpsidedown(uint64(e))
}
}
return n
}
type BackIndexRowValue struct {
state protoimpl.MessageState `protogen:"open.v1"`
TermsEntries []*BackIndexTermsEntry `protobuf:"bytes,1,rep,name=termsEntries" json:"termsEntries,omitempty"`
StoredEntries []*BackIndexStoreEntry `protobuf:"bytes,2,rep,name=storedEntries" json:"storedEntries,omitempty"`
unknownFields protoimpl.UnknownFields
sizeCache protoimpl.SizeCache
}
func (x *BackIndexRowValue) Reset() {
*x = BackIndexRowValue{}
mi := &file_index_upsidedown_upsidedown_proto_msgTypes[2]
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
ms.StoreMessageInfo(mi)
}
func (x *BackIndexRowValue) String() string {
return protoimpl.X.MessageStringOf(x)
}
func (*BackIndexRowValue) ProtoMessage() {}
func (x *BackIndexRowValue) ProtoReflect() protoreflect.Message {
mi := &file_index_upsidedown_upsidedown_proto_msgTypes[2]
if x != nil {
ms := protoimpl.X.MessageStateOf(protoimpl.Pointer(x))
if ms.LoadMessageInfo() == nil {
ms.StoreMessageInfo(mi)
}
return ms
}
return mi.MessageOf(x)
}
// Deprecated: Use BackIndexRowValue.ProtoReflect.Descriptor instead.
func (*BackIndexRowValue) Descriptor() ([]byte, []int) {
return file_index_upsidedown_upsidedown_proto_rawDescGZIP(), []int{2}
}
func (x *BackIndexRowValue) GetTermsEntries() []*BackIndexTermsEntry {
if x != nil {
return x.TermsEntries
}
return nil
}
func (x *BackIndexRowValue) GetStoredEntries() []*BackIndexStoreEntry {
if x != nil {
return x.StoredEntries
}
return nil
}
func (x *BackIndexRowValue) MarshalTo(data []byte) (n int, err error) {
var i int
_ = i
var l int
_ = l
if len(x.TermsEntries) > 0 {
for _, msg := range x.TermsEntries {
data[i] = 0xa
i++
i = encodeVarintUpsidedown(data, i, uint64(msg.Size()))
n, err := msg.MarshalTo(data[i:])
if err != nil {
return 0, err
}
i += n
}
}
if len(x.StoredEntries) > 0 {
for _, msg := range x.StoredEntries {
data[i] = 0x12
i++
i = encodeVarintUpsidedown(data, i, uint64(msg.Size()))
n, err := msg.MarshalTo(data[i:])
if err != nil {
return 0, err
}
i += n
}
}
return i, nil
}
func (x *BackIndexRowValue) Size() (n int) {
var l int
_ = l
if len(x.TermsEntries) > 0 {
for _, e := range x.TermsEntries {
l = e.Size()
n += 1 + l + sovUpsidedown(uint64(l))
}
}
if len(x.StoredEntries) > 0 {
for _, e := range x.StoredEntries {
l = e.Size()
n += 1 + l + sovUpsidedown(uint64(l))
}
}
return n
}
func skipUpsidedown(data []byte) (n int, err error) {
l := len(data)
iNdEx := 0
for iNdEx < l {
var wire uint64
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return 0, io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
wire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
wireType := int(wire & 0x7)
switch wireType {
case 0:
for {
if iNdEx >= l {
return 0, io.ErrUnexpectedEOF
}
iNdEx++
if data[iNdEx-1] < 0x80 {
break
}
}
return iNdEx, nil
case 1:
iNdEx += 8
return iNdEx, nil
case 2:
var length int
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return 0, io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
length |= (int(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
iNdEx += length
if length < 0 {
return 0, ErrInvalidLengthUpsidedown
}
return iNdEx, nil
case 3:
for {
var innerWire uint64
var start int = iNdEx
for shift := uint(0); ; shift += 7 {
if iNdEx >= l {
return 0, io.ErrUnexpectedEOF
}
b := data[iNdEx]
iNdEx++
innerWire |= (uint64(b) & 0x7F) << shift
if b < 0x80 {
break
}
}
innerWireType := int(innerWire & 0x7)
if innerWireType == 4 {
break
}
next, err := skipUpsidedown(data[start:])
if err != nil {
return 0, err
}
iNdEx = start + next
}
return iNdEx, nil
case 4:
return iNdEx, nil
case 5:
iNdEx += 4
return iNdEx, nil
default:
return 0, fmt.Errorf("proto: illegal wireType %d", wireType)
}
}
panic("unreachable")
}
func sovUpsidedown(x uint64) (n int) {
for {
n++
x >>= 7
if x == 0 {
break
}
}
return n
}
func encodeVarintUpsidedown(data []byte, offset int, v uint64) int {
for v >= 1<<7 {
data[offset] = uint8(v&0x7f | 0x80)
v >>= 7
offset++
}
data[offset] = uint8(v)
return offset + 1
}
var File_index_upsidedown_upsidedown_proto protoreflect.FileDescriptor
const file_index_upsidedown_upsidedown_proto_rawDesc = "" +
"\n" +
"!index/upsidedown/upsidedown.proto\"A\n" +
"\x13BackIndexTermsEntry\x12\x14\n" +
"\x05field\x18\x01 \x02(\rR\x05field\x12\x14\n" +
"\x05terms\x18\x02 \x03(\tR\x05terms\"S\n" +
"\x13BackIndexStoreEntry\x12\x14\n" +
"\x05field\x18\x01 \x02(\rR\x05field\x12&\n" +
"\x0earrayPositions\x18\x02 \x03(\x04R\x0earrayPositions\"\x89\x01\n" +
"\x11BackIndexRowValue\x128\n" +
"\ftermsEntries\x18\x01 \x03(\v2\x14.BackIndexTermsEntryR\ftermsEntries\x12:\n" +
"\rstoredEntries\x18\x02 \x03(\v2\x14.BackIndexStoreEntryR\rstoredEntries"
var (
file_index_upsidedown_upsidedown_proto_rawDescOnce sync.Once
file_index_upsidedown_upsidedown_proto_rawDescData []byte
)
func file_index_upsidedown_upsidedown_proto_rawDescGZIP() []byte {
file_index_upsidedown_upsidedown_proto_rawDescOnce.Do(func() {
file_index_upsidedown_upsidedown_proto_rawDescData = protoimpl.X.CompressGZIP(unsafe.Slice(unsafe.StringData(file_index_upsidedown_upsidedown_proto_rawDesc), len(file_index_upsidedown_upsidedown_proto_rawDesc)))
})
return file_index_upsidedown_upsidedown_proto_rawDescData
}
var file_index_upsidedown_upsidedown_proto_msgTypes = make([]protoimpl.MessageInfo, 3)
var file_index_upsidedown_upsidedown_proto_goTypes = []any{
(*BackIndexTermsEntry)(nil), // 0: BackIndexTermsEntry
(*BackIndexStoreEntry)(nil), // 1: BackIndexStoreEntry
(*BackIndexRowValue)(nil), // 2: BackIndexRowValue
}
var file_index_upsidedown_upsidedown_proto_depIdxs = []int32{
0, // 0: BackIndexRowValue.termsEntries:type_name -> BackIndexTermsEntry
1, // 1: BackIndexRowValue.storedEntries:type_name -> BackIndexStoreEntry
2, // [2:2] is the sub-list for method output_type
2, // [2:2] is the sub-list for method input_type
2, // [2:2] is the sub-list for extension type_name
2, // [2:2] is the sub-list for extension extendee
0, // [0:2] is the sub-list for field type_name
}
func init() { file_index_upsidedown_upsidedown_proto_init() }
func file_index_upsidedown_upsidedown_proto_init() {
if File_index_upsidedown_upsidedown_proto != nil {
return
}
type x struct{}
out := protoimpl.TypeBuilder{
File: protoimpl.DescBuilder{
GoPackagePath: reflect.TypeOf(x{}).PkgPath(),
RawDescriptor: unsafe.Slice(unsafe.StringData(file_index_upsidedown_upsidedown_proto_rawDesc), len(file_index_upsidedown_upsidedown_proto_rawDesc)),
NumEnums: 0,
NumMessages: 3,
NumExtensions: 0,
NumServices: 0,
},
GoTypes: file_index_upsidedown_upsidedown_proto_goTypes,
DependencyIndexes: file_index_upsidedown_upsidedown_proto_depIdxs,
MessageInfos: file_index_upsidedown_upsidedown_proto_msgTypes,
}.Build()
File_index_upsidedown_upsidedown_proto = out.File
file_index_upsidedown_upsidedown_proto_goTypes = nil
file_index_upsidedown_upsidedown_proto_depIdxs = nil
}
================================================
FILE: index/upsidedown/upsidedown.proto
================================================
message BackIndexTermsEntry {
required uint32 field = 1;
repeated string terms = 2;
}
message BackIndexStoreEntry {
required uint32 field = 1;
repeated uint64 arrayPositions = 2;
}
message BackIndexRowValue {
repeated BackIndexTermsEntry termsEntries = 1;
repeated BackIndexStoreEntry storedEntries = 2;
}
================================================
FILE: index/upsidedown/upsidedown_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package upsidedown
import (
"context"
"log"
"reflect"
"regexp"
"strconv"
"strings"
"sync"
"testing"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
regexpTokenizer "github.com/blevesearch/bleve/v2/analysis/tokenizer/regexp"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/null"
"github.com/blevesearch/bleve/v2/registry"
index "github.com/blevesearch/bleve_index_api"
)
var testAnalyzer = &analysis.DefaultAnalyzer{
Tokenizer: regexpTokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)),
}
func TestIndexOpenReopen(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
var expectedCount uint64
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// opening the database should have inserted a version
expectedLength := uint64(1)
rowCount, err := idx.(*UpsideDownCouch).rowCount()
if err != nil {
t.Error(err)
}
if rowCount != expectedLength {
t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
}
// now close it
err = idx.Close()
if err != nil {
t.Fatal(err)
}
idx, err = NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
// now close it
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexInsert(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// should have 4 rows (1 for version, 1 for schema field, and 1 for single term, and 1 for the term count, and 1 for the back index entry)
expectedLength := uint64(1 + 1 + 1 + 1 + 1)
rowCount, err := idx.(*UpsideDownCouch).rowCount()
if err != nil {
t.Error(err)
}
if rowCount != expectedLength {
t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
}
}
func TestIndexInsertThenDelete(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
doc2 := document.NewDocument("2")
doc2.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc2)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
err = idx.Delete("1")
if err != nil {
t.Errorf("Error deleting entry from index: %v", err)
}
expectedCount--
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
err = idx.Delete("2")
if err != nil {
t.Errorf("Error deleting entry from index: %v", err)
}
expectedCount--
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// should have 2 rows (1 for version, 1 for schema field, 1 for dictionary row garbage)
expectedLength := uint64(1 + 1 + 1)
rowCount, err := idx.(*UpsideDownCouch).rowCount()
if err != nil {
t.Error(err)
}
if rowCount != expectedLength {
t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
}
}
func TestIndexInsertThenUpdate(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
// this update should overwrite one term, and introduce one new one
doc = document.NewDocument("1")
doc.AddField(document.NewTextFieldWithAnalyzer("name", []uint64{}, []byte("test fail"), testAnalyzer))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error deleting entry from index: %v", err)
}
// should have 2 rows (1 for version, 1 for schema field, and 2 for the two term, and 2 for the term counts, and 1 for the back index entry)
expectedLength := uint64(1 + 1 + 2 + 2 + 1)
rowCount, err := idx.(*UpsideDownCouch).rowCount()
if err != nil {
t.Error(err)
}
if rowCount != expectedLength {
t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
}
// now do another update that should remove one of the terms
doc = document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("fail")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error deleting entry from index: %v", err)
}
// should have 2 rows (1 for version, 1 for schema field, and 1 for the remaining term, and 2 for the term diciontary, and 1 for the back index entry)
expectedLength = uint64(1 + 1 + 1 + 2 + 1)
rowCount, err = idx.(*UpsideDownCouch).rowCount()
if err != nil {
t.Error(err)
}
if rowCount != expectedLength {
t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
}
}
func TestIndexInsertMultiple(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
var expectedCount uint64
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
// should have 4 rows (1 for version, 1 for schema field, and 2 for single term, and 1 for the term count, and 2 for the back index entries)
expectedLength := uint64(1 + 1 + 2 + 1 + 2)
rowCount, err := idx.(*UpsideDownCouch).rowCount()
if err != nil {
t.Error(err)
}
if rowCount != expectedLength {
t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
}
// close, reopen and add one more to test that counting works correctly
err = idx.Close()
if err != nil {
t.Fatal(err)
}
idx, err = NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Fatalf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc = document.NewDocument("3")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexInsertWithStore(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), index.IndexField|index.StoreField))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// should have 6 rows (1 for version, 1 for schema field, and 1 for single term, and 1 for the stored field and 1 for the term count, and 1 for the back index entry)
expectedLength := uint64(1 + 1 + 1 + 1 + 1 + 1)
rowCount, err := idx.(*UpsideDownCouch).rowCount()
if err != nil {
t.Error(err)
}
if rowCount != expectedLength {
t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
storedDocInt, err := indexReader.Document("1")
if err != nil {
t.Error(err)
}
storedDoc := storedDocInt.(*document.Document)
if len(storedDoc.Fields) != 1 {
t.Errorf("expected 1 stored field, got %d", len(storedDoc.Fields))
}
textField, ok := storedDoc.Fields[0].(*document.TextField)
if !ok {
t.Errorf("expected text field")
}
if string(textField.Value()) != "test" {
t.Errorf("expected field content 'test', got '%s'", string(textField.Value()))
}
}
func TestIndexInternalCRUD(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
// get something that doesn't exist yet
val, err := indexReader.GetInternal([]byte("key"))
if err != nil {
t.Error(err)
}
if val != nil {
t.Errorf("expected nil, got %s", val)
}
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
// set
err = idx.SetInternal([]byte("key"), []byte("abc"))
if err != nil {
t.Error(err)
}
indexReader2, err := idx.Reader()
if err != nil {
t.Error(err)
}
// get
val, err = indexReader2.GetInternal([]byte("key"))
if err != nil {
t.Error(err)
}
if string(val) != "abc" {
t.Errorf("expected %s, got '%s'", "abc", val)
}
err = indexReader2.Close()
if err != nil {
t.Fatal(err)
}
// delete
err = idx.DeleteInternal([]byte("key"))
if err != nil {
t.Error(err)
}
indexReader3, err := idx.Reader()
if err != nil {
t.Error(err)
}
// get again
val, err = indexReader3.GetInternal([]byte("key"))
if err != nil {
t.Error(err)
}
if val != nil {
t.Errorf("expected nil, got %s", val)
}
err = indexReader3.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexBatch(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
// first create 2 docs the old fashioned way
doc := document.NewDocument("1")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2")))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
// now create a batch which does 3 things
// insert new doc
// update existing doc
// delete existing doc
// net document count change 0
batch := index.NewBatch()
doc = document.NewDocument("3")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
batch.Update(doc)
doc = document.NewDocument("2")
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test2updated")))
batch.Update(doc)
batch.Delete("1")
err = idx.Batch(batch)
if err != nil {
t.Error(err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
docCount, err := indexReader.DocCount()
if err != nil {
t.Fatal(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
docIDReader, err := indexReader.DocIDReaderAll()
if err != nil {
t.Error(err)
}
var docIds []index.IndexInternalID
docID, err := docIDReader.Next()
for docID != nil && err == nil {
docIds = append(docIds, docID)
docID, err = docIDReader.Next()
}
if err != nil {
t.Error(err)
}
expectedDocIds := []index.IndexInternalID{index.IndexInternalID("2"), index.IndexInternalID("3")}
if !reflect.DeepEqual(docIds, expectedDocIds) {
t.Errorf("expected ids: %v, got ids: %v", expectedDocIds, docIds)
}
}
func TestIndexInsertUpdateDeleteWithMultipleTypesStored(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var expectedCount uint64
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err := reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), index.IndexField|index.StoreField))
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 35.99, index.IndexField|index.StoreField))
df, err := document.NewDateTimeFieldWithIndexingOptions("unixEpoch", []uint64{}, time.Unix(0, 0), time.RFC3339, index.IndexField|index.StoreField)
if err != nil {
t.Error(err)
}
doc.AddField(df)
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
expectedCount++
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
// should have 72 rows
// 1 for version
// 3 for schema fields
// 1 for text term
// 16 for numeric terms
// 16 for date terms
// 3 for the stored field
// 1 for the text term count
// 16 for numeric term counts
// 16 for date term counts
// 1 for the back index entry
expectedLength := uint64(1 + 3 + 1 + (64 / document.DefaultPrecisionStep) + (64 / document.DefaultPrecisionStep) + 3 + 1 + (64 / document.DefaultPrecisionStep) + (64 / document.DefaultPrecisionStep) + 1)
rowCount, err := idx.(*UpsideDownCouch).rowCount()
if err != nil {
t.Error(err)
}
if rowCount != expectedLength {
t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
storedDocInt, err := indexReader.Document("1")
if err != nil {
t.Error(err)
}
storedDoc := storedDocInt.(*document.Document)
err = indexReader.Close()
if err != nil {
t.Error(err)
}
if len(storedDoc.Fields) != 3 {
t.Errorf("expected 3 stored field, got %d", len(storedDoc.Fields))
}
textField, ok := storedDoc.Fields[0].(*document.TextField)
if !ok {
t.Errorf("expected text field")
}
if string(textField.Value()) != "test" {
t.Errorf("expected field content 'test', got '%s'", string(textField.Value()))
}
numField, ok := storedDoc.Fields[1].(*document.NumericField)
if !ok {
t.Errorf("expected numeric field")
}
numFieldNumer, err := numField.Number()
if err != nil {
t.Error(err)
} else {
if numFieldNumer != 35.99 {
t.Errorf("expected numeric value 35.99, got %f", numFieldNumer)
}
}
dateField, ok := storedDoc.Fields[2].(*document.DateTimeField)
if !ok {
t.Errorf("expected date field")
}
dateFieldDate, _, err := dateField.DateTime()
if err != nil {
t.Error(err)
} else {
if dateFieldDate != time.Unix(0, 0).UTC() {
t.Errorf("expected date value unix epoch, got %v", dateFieldDate)
}
}
// now update the document, but omit one of the fields
doc = document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("testup"), index.IndexField|index.StoreField))
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 36.99, index.IndexField|index.StoreField))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader2, err := idx.Reader()
if err != nil {
t.Error(err)
}
// expected doc count shouldn't have changed
docCount, err = indexReader2.DocCount()
if err != nil {
t.Fatal(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
// should only get 2 fields back now though
storedDocInt, err = indexReader2.Document("1")
if err != nil {
t.Error(err)
}
storedDoc = storedDocInt.(*document.Document)
err = indexReader2.Close()
if err != nil {
t.Error(err)
}
if len(storedDoc.Fields) != 2 {
t.Errorf("expected 3 stored field, got %d", len(storedDoc.Fields))
}
textField, ok = storedDoc.Fields[0].(*document.TextField)
if !ok {
t.Errorf("expected text field")
}
if string(textField.Value()) != "testup" {
t.Errorf("expected field content 'testup', got '%s'", string(textField.Value()))
}
numField, ok = storedDoc.Fields[1].(*document.NumericField)
if !ok {
t.Errorf("expected numeric field")
}
numFieldNumer, err = numField.Number()
if err != nil {
t.Error(err)
} else {
if numFieldNumer != 36.99 {
t.Errorf("expected numeric value 36.99, got %f", numFieldNumer)
}
}
// now delete the document
err = idx.Delete("1")
if err != nil {
t.Errorf("Error deleting entry from index: %v", err)
}
expectedCount--
// expected doc count shouldn't have changed
reader, err = idx.Reader()
if err != nil {
t.Fatal(err)
}
docCount, err = reader.DocCount()
if err != nil {
t.Error(err)
}
if docCount != expectedCount {
t.Errorf("Expected document count to be %d got %d", expectedCount, docCount)
}
err = reader.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexInsertFields(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), index.IndexField|index.StoreField))
doc.AddField(document.NewNumericFieldWithIndexingOptions("age", []uint64{}, 35.99, index.IndexField|index.StoreField))
dateField, err := document.NewDateTimeFieldWithIndexingOptions("unixEpoch", []uint64{}, time.Unix(0, 0), time.RFC3339, index.IndexField|index.StoreField)
if err != nil {
t.Error(err)
}
doc.AddField(dateField)
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
fields, err := indexReader.Fields()
if err != nil {
t.Error(err)
} else {
expectedFields := []string{"name", "age", "unixEpoch"}
if !reflect.DeepEqual(fields, expectedFields) {
t.Errorf("expected fields: %v, got %v", expectedFields, fields)
}
}
}
func TestIndexUpdateComposites(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), index.IndexField|index.StoreField))
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), index.IndexField|index.StoreField))
doc.AddField(document.NewCompositeFieldWithIndexingOptions("_all", true, nil, nil, index.IndexField))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
// should have 72 rows
// 1 for version
// 3 for schema fields
// 4 for text term
// 2 for the stored field
// 4 for the text term count
// 1 for the back index entry
expectedLength := uint64(1 + 3 + 4 + 2 + 4 + 1)
rowCount, err := idx.(*UpsideDownCouch).rowCount()
if err != nil {
t.Error(err)
}
if rowCount != expectedLength {
t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
}
// now lets update it
doc = document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("testupdated"), index.IndexField|index.StoreField))
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("misterupdated"), index.IndexField|index.StoreField))
doc.AddField(document.NewCompositeFieldWithIndexingOptions("_all", true, nil, nil, index.IndexField))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
// make sure new values are in index
storedDocInt, err := indexReader.Document("1")
if err != nil {
t.Error(err)
}
storedDoc := storedDocInt.(*document.Document)
if len(storedDoc.Fields) != 2 {
t.Errorf("expected 2 stored field, got %d", len(storedDoc.Fields))
}
textField, ok := storedDoc.Fields[0].(*document.TextField)
if !ok {
t.Errorf("expected text field")
}
if string(textField.Value()) != "testupdated" {
t.Errorf("expected field content 'test', got '%s'", string(textField.Value()))
}
// should have the same row count as before, plus 4 term dictionary garbage rows
expectedLength += 4
rowCount, err = idx.(*UpsideDownCouch).rowCount()
if err != nil {
t.Error(err)
}
if rowCount != expectedLength {
t.Errorf("expected %d rows, got: %d", expectedLength, rowCount)
}
}
func TestIndexFieldsMisc(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), index.IndexField|index.StoreField))
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), index.IndexField|index.StoreField))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
fieldName1 := idx.(*UpsideDownCouch).fieldCache.FieldIndexed(0)
if fieldName1 != "name" {
t.Errorf("expected field named 'name', got '%s'", fieldName1)
}
fieldName2 := idx.(*UpsideDownCouch).fieldCache.FieldIndexed(1)
if fieldName2 != "title" {
t.Errorf("expected field named 'title', got '%s'", fieldName2)
}
fieldName3 := idx.(*UpsideDownCouch).fieldCache.FieldIndexed(2)
if fieldName3 != "" {
t.Errorf("expected field named '', got '%s'", fieldName3)
}
}
func TestIndexTermReaderCompositeFields(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), index.IndexField|index.StoreField|index.IncludeTermVectors))
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), index.IndexField|index.StoreField|index.IncludeTermVectors))
doc.AddField(document.NewCompositeFieldWithIndexingOptions("_all", true, nil, nil, index.IndexField|index.IncludeTermVectors))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
termFieldReader, err := indexReader.TermFieldReader(context.TODO(), []byte("mister"), "_all", true, true, true)
if err != nil {
t.Error(err)
}
tfd, err := termFieldReader.Next(nil)
for tfd != nil && err == nil {
if !tfd.ID.Equals(index.IndexInternalID("1")) {
t.Errorf("expected to find document id 1")
}
tfd, err = termFieldReader.Next(nil)
}
if err != nil {
t.Error(err)
}
}
func TestIndexDocValueReader(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions("name", []uint64{}, []byte("test"), index.IndexField|index.StoreField|index.IncludeTermVectors))
doc.AddField(document.NewTextFieldWithIndexingOptions("title", []uint64{}, []byte("mister"), index.IndexField|index.StoreField|index.IncludeTermVectors))
err = idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
indexReader, err := idx.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
actualFieldTerms := make(fieldTerms)
dvr, err := indexReader.DocValueReader([]string{"name", "title"})
if err != nil {
t.Error(err)
}
err = dvr.VisitDocValues(index.IndexInternalID("1"), func(field string, term []byte) {
actualFieldTerms[field] = append(actualFieldTerms[field], string(term))
})
if err != nil {
t.Error(err)
}
expectedFieldTerms := fieldTerms{
"name": []string{"test"},
"title": []string{"mister"},
}
if !reflect.DeepEqual(actualFieldTerms, expectedFieldTerms) {
t.Errorf("expected field terms: %#v, got: %#v", expectedFieldTerms, actualFieldTerms)
}
}
func BenchmarkBatch(b *testing.B) {
cache := registry.NewCache()
analyzer, err := cache.AnalyzerNamed(standard.Name)
if err != nil {
b.Fatal(err)
}
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(null.Name, nil, analysisQueue)
if err != nil {
b.Fatal(err)
}
err = idx.Open()
if err != nil {
b.Fatal(err)
}
batch := index.NewBatch()
for i := 0; i < 100; i++ {
d := document.NewDocument(strconv.Itoa(i))
f := document.NewTextFieldWithAnalyzer("desc", nil, bleveWikiArticle1K, analyzer)
d.AddField(f)
batch.Update(d)
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
err = idx.Batch(batch)
if err != nil {
b.Fatal(err)
}
}
}
func TestConcurrentUpdate(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
// do some concurrent updates
var wg sync.WaitGroup
for i := 0; i < 10; i++ {
wg.Add(1)
go func(i int) {
doc := document.NewDocument("1")
doc.AddField(document.NewTextFieldWithIndexingOptions(strconv.Itoa(i), []uint64{}, []byte(strconv.Itoa(i)), index.StoreField))
err := idx.Update(doc)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
wg.Done()
}(i)
}
wg.Wait()
// now load the name field and see what we get
r, err := idx.Reader()
if err != nil {
log.Fatal(err)
}
docInt, err := r.Document("1")
if err != nil {
log.Fatal(err)
}
doc := docInt.(*document.Document)
if len(doc.Fields) > 1 {
t.Errorf("expected single field, found %d", len(doc.Fields))
}
err = r.Close()
if err != nil {
t.Fatal(err)
}
}
func TestLargeField(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var largeFieldValue []byte
for len(largeFieldValue) < RowBufferSize {
largeFieldValue = append(largeFieldValue, bleveWikiArticle1K...)
}
t.Logf("large field size: %d", len(largeFieldValue))
d := document.NewDocument("large")
f := document.NewTextFieldWithIndexingOptions("desc", nil, largeFieldValue, index.IndexField|index.StoreField)
d.AddField(f)
err = idx.Update(d)
if err != nil {
t.Fatal(err)
}
}
func TestIndexBatchPersistedCallbackWithErrorUpsideDown(t *testing.T) {
defer func() {
err := DestroyTest()
if err != nil {
t.Fatal(err)
}
}()
analysisQueue := index.NewAnalysisQueue(1)
idx, err := NewUpsideDownCouch(boltdb.Name, boltTestConfig, analysisQueue)
if err != nil {
t.Fatal(err)
}
err = idx.Open()
if err != nil {
t.Errorf("error opening index: %v", err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
var callbackExecuted bool
batch := index.NewBatch()
batch.SetPersistedCallback(func(e error) {
callbackExecuted = true
})
// By using a really large ID, we ensure that the batch will fail,
// because the key generated by upside down will be too large for BoltDB
reallyBigId := strings.Repeat("x", 32768+1)
doc := document.NewDocument(reallyBigId)
doc.AddField(document.NewTextField("name", []uint64{}, []byte("test3")))
batch.Update(doc)
_ = idx.Batch(batch)
// don't fail on this error, that isn't what we're testing
if !callbackExecuted {
t.Fatal("expected callback to fire, it did not")
}
}
// fieldTerms contains the terms used by a document, keyed by field
type fieldTerms map[string][]string
// FieldsNotYetCached returns a list of fields not yet cached out of a larger list of fields
func (f fieldTerms) FieldsNotYetCached(fields []string) []string {
rv := make([]string, 0, len(fields))
for _, field := range fields {
if _, ok := f[field]; !ok {
rv = append(rv, field)
}
}
return rv
}
// Merge will combine two fieldTerms
// it assumes that the terms lists are complete (thus do not need to be merged)
// field terms from the other list always replace the ones in the receiver
func (f fieldTerms) Merge(other fieldTerms) {
for field, terms := range other {
f[field] = terms
}
}
================================================
FILE: index.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"context"
"fmt"
"github.com/blevesearch/bleve/v2/index/upsidedown"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
// A Batch groups together multiple Index and Delete
// operations you would like performed at the same
// time. The Batch structure is NOT thread-safe.
// You should only perform operations on a batch
// from a single thread at a time. Once batch
// execution has started, you may not modify it.
type Batch struct {
index Index
internal *index.Batch
lastDocSize uint64
totalSize uint64
}
// Index adds the specified index operation to the
// batch. NOTE: the bleve Index is not updated
// until the batch is executed.
func (b *Batch) Index(id string, data interface{}) error {
if id == "" {
return ErrorEmptyID
}
if eventIndex, ok := b.index.(index.EventIndex); ok {
eventIndex.FireIndexEvent()
}
doc := document.NewDocument(id)
err := b.index.Mapping().MapDocument(doc, data)
if err != nil {
return err
}
b.internal.Update(doc)
b.lastDocSize = uint64(doc.Size() +
len(id) + size.SizeOfString) // overhead from internal
b.totalSize += b.lastDocSize
return nil
}
func (b *Batch) IndexSynonym(id string, collection string, definition *SynonymDefinition) error {
if id == "" {
return ErrorEmptyID
}
if eventIndex, ok := b.index.(index.EventIndex); ok {
eventIndex.FireIndexEvent()
}
synMap, ok := b.index.Mapping().(mapping.SynonymMapping)
if !ok {
return ErrorSynonymSearchNotSupported
}
if err := definition.Validate(); err != nil {
return err
}
doc := document.NewSynonymDocument(id)
err := synMap.MapSynonymDocument(doc, collection, definition.Input, definition.Synonyms)
if err != nil {
return err
}
b.internal.Update(doc)
b.lastDocSize = uint64(doc.Size() +
len(id) + size.SizeOfString) // overhead from internal
b.totalSize += b.lastDocSize
return nil
}
func (b *Batch) LastDocSize() uint64 {
return b.lastDocSize
}
func (b *Batch) TotalDocsSize() uint64 {
return b.totalSize
}
// IndexAdvanced adds the specified index operation to the
// batch which skips the mapping. NOTE: the bleve Index is not updated
// until the batch is executed.
func (b *Batch) IndexAdvanced(doc *document.Document) (err error) {
if doc.ID() == "" {
return ErrorEmptyID
}
b.internal.Update(doc)
return nil
}
// Delete adds the specified delete operation to the
// batch. NOTE: the bleve Index is not updated until
// the batch is executed.
func (b *Batch) Delete(id string) {
if id != "" {
b.internal.Delete(id)
}
}
// SetInternal adds the specified set internal
// operation to the batch. NOTE: the bleve Index is
// not updated until the batch is executed.
func (b *Batch) SetInternal(key, val []byte) {
b.internal.SetInternal(key, val)
}
// DeleteInternal adds the specified delete internal
// operation to the batch. NOTE: the bleve Index is
// not updated until the batch is executed.
func (b *Batch) DeleteInternal(key []byte) {
b.internal.DeleteInternal(key)
}
// Size returns the total number of operations inside the batch
// including normal index operations and internal operations.
func (b *Batch) Size() int {
return len(b.internal.IndexOps) + len(b.internal.InternalOps)
}
// String prints a user friendly string representation of what
// is inside this batch.
func (b *Batch) String() string {
return b.internal.String()
}
// Reset returns a Batch to the empty state so that it can
// be reused in the future.
func (b *Batch) Reset() {
b.internal.Reset()
b.lastDocSize = 0
b.totalSize = 0
}
func (b *Batch) Merge(o *Batch) {
if o != nil && o.internal != nil {
b.internal.Merge(o.internal)
if o.LastDocSize() > 0 {
b.lastDocSize = o.LastDocSize()
}
b.totalSize = uint64(b.internal.TotalDocSize())
}
}
func (b *Batch) SetPersistedCallback(f index.BatchCallback) {
b.internal.SetPersistedCallback(f)
}
func (b *Batch) PersistedCallback() index.BatchCallback {
return b.internal.PersistedCallback()
}
// An Index implements all the indexing and searching
// capabilities of bleve. An Index can be created
// using the New() and Open() methods.
//
// Index() takes an input value, deduces a DocumentMapping for its type,
// assigns string paths to its fields or values then applies field mappings on
// them.
//
// The DocumentMapping used to index a value is deduced by the following rules:
// 1. If value implements mapping.bleveClassifier interface, resolve the mapping
// from BleveType().
// 2. If value implements mapping.Classifier interface, resolve the mapping
// from Type().
// 3. If value has a string field or value at IndexMapping.TypeField.
//
// (defaulting to "_type"), use it to resolve the mapping. Fields addressing
// is described below.
// 4) If IndexMapping.DefaultType is registered, return it.
// 5) Return IndexMapping.DefaultMapping.
//
// Each field or nested field of the value is identified by a string path, then
// mapped to one or several FieldMappings which extract the result for analysis.
//
// Struct values fields are identified by their "json:" tag, or by their name.
// Nested fields are identified by prefixing with their parent identifier,
// separated by a dot.
//
// Map values entries are identified by their string key. Entries not indexed
// by strings are ignored. Entry values are identified recursively like struct
// fields.
//
// Slice and array values are identified by their field name. Their elements
// are processed sequentially with the same FieldMapping.
//
// String, float64 and time.Time values are identified by their field name.
// Other types are ignored.
//
// Each value identifier is decomposed in its parts and recursively address
// SubDocumentMappings in the tree starting at the root DocumentMapping. If a
// mapping is found, all its FieldMappings are applied to the value. If no
// mapping is found and the root DocumentMapping is dynamic, default mappings
// are used based on value type and IndexMapping default configurations.
//
// Finally, mapped values are analyzed, indexed or stored. See
// FieldMapping.Analyzer to know how an analyzer is resolved for a given field.
//
// Examples:
//
// type Date struct {
// Day string `json:"day"`
// Month string
// Year string
// }
//
// type Person struct {
// FirstName string `json:"first_name"`
// LastName string
// BirthDate Date `json:"birth_date"`
// }
//
// A Person value FirstName is mapped by the SubDocumentMapping at
// "first_name". Its LastName is mapped by the one at "LastName". The day of
// BirthDate is mapped to the SubDocumentMapping "day" of the root
// SubDocumentMapping "birth_date". It will appear as the "birth_date.day"
// field in the index. The month is mapped to "birth_date.Month".
type Index interface {
// Index analyzes, indexes or stores mapped data fields. Supplied
// identifier is bound to analyzed data and will be retrieved by search
// requests. See Index interface documentation for details about mapping
// rules.
Index(id string, data interface{}) error
Delete(id string) error
NewBatch() *Batch
Batch(b *Batch) error
// Document returns specified document or nil if the document is not
// indexed or stored.
Document(id string) (index.Document, error)
// DocCount returns the number of documents in the index.
DocCount() (uint64, error)
Search(req *SearchRequest) (*SearchResult, error)
SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error)
Fields() ([]string, error)
FieldDict(field string) (index.FieldDict, error)
FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error)
FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error)
Close() error
Mapping() mapping.IndexMapping
Stats() *IndexStat
StatsMap() map[string]interface{}
GetInternal(key []byte) ([]byte, error)
SetInternal(key, val []byte) error
DeleteInternal(key []byte) error
// Name returns the name of the index (by default this is the path)
Name() string
// SetName lets you assign your own logical name to this index
SetName(string)
// Advanced returns the internal index implementation
Advanced() (index.Index, error)
}
// New index at the specified path, must not exist.
// The provided mapping will be used for all
// Index/Search operations.
func New(path string, mapping mapping.IndexMapping) (Index, error) {
return newIndexUsing(path, mapping, Config.DefaultIndexType, Config.DefaultKVStore, nil)
}
// NewMemOnly creates a memory-only index.
// The contents of the index is NOT persisted,
// and will be lost once closed.
// The provided mapping will be used for all
// Index/Search operations.
func NewMemOnly(mapping mapping.IndexMapping) (Index, error) {
return newIndexUsing("", mapping, upsidedown.Name, Config.DefaultMemKVStore, nil)
}
// NewUsing creates index at the specified path,
// which must not already exist.
// The provided mapping will be used for all
// Index/Search operations.
// The specified index type will be used.
// The specified kvstore implementation will be used
// and the provided kvconfig will be passed to its
// constructor. Note that currently the values of kvconfig must
// be able to be marshaled and unmarshaled using the encoding/json library (used
// when reading/writing the index metadata file).
func NewUsing(path string, mapping mapping.IndexMapping, indexType string, kvstore string, kvconfig map[string]interface{}) (Index, error) {
return newIndexUsing(path, mapping, indexType, kvstore, kvconfig)
}
// Open index at the specified path, must exist.
// The mapping used when it was created will be used for all Index/Search operations.
func Open(path string) (Index, error) {
return openIndexUsing(path, nil)
}
// OpenUsing opens index at the specified path, must exist.
// The mapping used when it was created will be used for all Index/Search operations.
// The provided runtimeConfig can override settings
// persisted when the kvstore was created.
// If runtimeConfig has updated mapping, then an index update is attempted
// Throws an error without any changes to the index if an unupdatable mapping is provided
func OpenUsing(path string, runtimeConfig map[string]interface{}) (Index, error) {
return openIndexUsing(path, runtimeConfig)
}
// Builder is a limited interface, used to build indexes in an offline mode.
// Items cannot be updated or deleted, and the caller MUST ensure a document is
// indexed only once.
type Builder interface {
Index(id string, data interface{}) error
Close() error
}
// NewBuilder creates a builder, which will build an index at the specified path,
// using the specified mapping and options.
func NewBuilder(path string, mapping mapping.IndexMapping, config map[string]interface{}) (Builder, error) {
return newBuilder(path, mapping, config)
}
// IndexCopyable is an index which supports an online copy operation
// of the index.
type IndexCopyable interface {
// CopyTo creates a fully functional copy of the index at the
// specified destination directory implementation.
CopyTo(d index.Directory) error
}
// FileSystemDirectory is the default implementation for the
// index.Directory interface.
type FileSystemDirectory string
// SynonymDefinition represents a synonym mapping in Bleve.
// Each instance associates one or more input terms with a list of synonyms,
// defining how terms are treated as equivalent in searches.
type SynonymDefinition struct {
// Input is an optional list of terms for unidirectional synonym mapping.
// When terms are specified in Input, they will map to the terms in Synonyms,
// making the relationship unidirectional (each Input maps to all Synonyms).
// If Input is omitted, the relationship is bidirectional among all Synonyms.
Input []string `json:"input,omitempty"`
// Synonyms is a list of terms that are considered equivalent.
// If Input is specified, each term in Input will map to each term in Synonyms.
// If Input is not specified, the Synonyms list will be treated bidirectionally,
// meaning each term in Synonyms is treated as synonymous with all others.
Synonyms []string `json:"synonyms"`
}
func (sd *SynonymDefinition) Validate() error {
if len(sd.Synonyms) == 0 {
return fmt.Errorf("synonym definition must have at least one synonym")
}
return nil
}
// SynonymIndex supports indexing synonym definitions alongside regular documents.
// Synonyms, grouped by collection name, define term relationships for query expansion in searches.
type SynonymIndex interface {
Index
// IndexSynonym indexes a synonym definition, with the specified id and belonging to the specified collection.
IndexSynonym(id string, collection string, definition *SynonymDefinition) error
}
type InsightsIndex interface {
Index
// TermFrequencies returns the tokens ordered by frequencies for the field index.
TermFrequencies(field string, limit int, descending bool) ([]index.TermFreq, error)
// CentroidCardinalities returns the centroids (clusters) from IVF indexes ordered by data density.
CentroidCardinalities(field string, limit int, desceding bool) ([]index.CentroidCardinality, error)
}
================================================
FILE: index_alias.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
// An IndexAlias is a wrapper around one or more
// Index objects. It has two distinct modes of
// operation.
// 1. When it points to a single index, ALL index
// operations are valid and will be passed through
// to the underlying index.
// 2. When it points to more than one index, the only
// valid operation is Search. In this case the
// search will be performed across all the
// underlying indexes and the results merged.
// Calls to Add/Remove/Swap the underlying indexes
// are atomic, so you can safely change the
// underlying Index objects while other components
// are performing operations.
type IndexAlias interface {
Index
Add(i ...Index)
Remove(i ...Index)
Swap(in, out []Index)
}
================================================
FILE: index_alias_impl.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"context"
"fmt"
"sort"
"sync"
"time"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/collector"
"github.com/blevesearch/bleve/v2/search/query"
index "github.com/blevesearch/bleve_index_api"
)
type indexAliasImpl struct {
name string
indexes []Index
mutex sync.RWMutex
open bool
// if all the indexes in that alias have the same mapping
// then the user can set the mapping here to avoid
// checking the mapping of each index in the alias
mapping mapping.IndexMapping
}
// NewIndexAlias creates a new IndexAlias over the provided
// Index objects.
func NewIndexAlias(indexes ...Index) *indexAliasImpl {
return &indexAliasImpl{
name: "alias",
indexes: indexes,
open: true,
}
}
// VisitIndexes invokes the visit callback on every
// indexes included in the index alias.
func (i *indexAliasImpl) VisitIndexes(visit func(Index)) {
i.mutex.RLock()
for _, idx := range i.indexes {
visit(idx)
}
i.mutex.RUnlock()
}
func (i *indexAliasImpl) isAliasToSingleIndex() error {
if len(i.indexes) < 1 {
return ErrorAliasEmpty
} else if len(i.indexes) > 1 {
return ErrorAliasMulti
}
return nil
}
func (i *indexAliasImpl) Index(id string, data interface{}) error {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return err
}
return i.indexes[0].Index(id, data)
}
func (i *indexAliasImpl) IndexSynonym(id string, collection string, definition *SynonymDefinition) error {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return err
}
if si, ok := i.indexes[0].(SynonymIndex); ok {
return si.IndexSynonym(id, collection, definition)
}
return ErrorSynonymSearchNotSupported
}
func (i *indexAliasImpl) Delete(id string) error {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return err
}
return i.indexes[0].Delete(id)
}
func (i *indexAliasImpl) Batch(b *Batch) error {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return err
}
return i.indexes[0].Batch(b)
}
func (i *indexAliasImpl) Document(id string) (index.Document, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil, err
}
return i.indexes[0].Document(id)
}
func (i *indexAliasImpl) DocCount() (uint64, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
rv := uint64(0)
if !i.open {
return 0, ErrorIndexClosed
}
for _, index := range i.indexes {
otherCount, err := index.DocCount()
if err == nil {
rv += otherCount
}
// tolerate errors to produce partial counts
}
return rv, nil
}
func (i *indexAliasImpl) Search(req *SearchRequest) (*SearchResult, error) {
return i.SearchInContext(context.Background(), req)
}
func (i *indexAliasImpl) SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
if len(i.indexes) < 1 {
return nil, ErrorAliasEmpty
}
if _, ok := ctx.Value(search.PreSearchKey).(bool); ok {
// since preSearchKey is set, it means that the request
// is being executed as part of a preSearch, which
// indicates that this index alias is set as an Index
// in another alias, so we need to do a preSearch search
// and NOT a real search
bm25PreSearch := isBM25Enabled(i.mapping)
flags := &preSearchFlags{
knn: requestHasKNN(req),
synonyms: !isMatchNoneQuery(req.Query),
bm25: bm25PreSearch,
}
return preSearchDataSearch(ctx, req, flags, i.indexes...)
}
// at this point we know we are doing a real search
// either after a preSearch is done, or directly
// on the alias
// check if request has preSearchData which would indicate that the
// request has already been preSearched and we can skip the
// preSearch step now, we call an optional function to
// redistribute the preSearchData to the individual indexes
// if necessary
var preSearchData map[string]map[string]interface{}
if req.PreSearchData != nil {
var err error
preSearchData, err = redistributePreSearchData(req, i.indexes)
if err != nil {
return nil, err
}
}
// short circuit the simple case
if len(i.indexes) == 1 {
if preSearchData != nil {
req.PreSearchData = preSearchData[i.indexes[0].Name()]
}
return i.indexes[0].SearchInContext(ctx, req)
}
// rescorer will be set if score fusion is supposed to happen
// at this alias (root alias), else will be nil
var rescorer *rescorer
if _, ok := ctx.Value(search.ScoreFusionKey).(bool); !ok {
// new context will be used in internal functions to collect data
// as suitable for fusion. Rescorer is used for rescoring
// using fusion algorithms.
if IsScoreFusionRequested(req) {
ctx = context.WithValue(ctx, search.ScoreFusionKey, true)
rescorer = newRescorer(req)
rescorer.prepareSearchRequest()
defer rescorer.restoreSearchRequest()
}
}
// at this stage we know we have multiple indexes
// check if preSearchData needs to be gathered from all indexes
// before executing the query
var err error
// only perform preSearch if
// - the request does not already have preSearchData
// - the request requires preSearch
var preSearchDuration time.Duration
var sr *SearchResult
// fusionKnnHits stores the KnnHits at the root alias.
// This is used with score fusion in case there is no need to
// send the knn hits to the leaf indexes in search phase.
// Refer to constructPreSearchDataAndFusionKnnHits for more info.
// This variable is left nil if we have to send the knn hits to leaf
// indexes again, else contains the knn hits if not required.
var fusionKnnHits search.DocumentMatchCollection
flags, err := preSearchRequired(ctx, req, i.mapping)
if err != nil {
return nil, err
}
if req.PreSearchData == nil && flags != nil {
searchStart := time.Now()
preSearchResult, err := preSearch(ctx, req, flags, i.indexes...)
if err != nil {
return nil, err
}
// check if the preSearch result has any errors and if so
// return the search result as is without executing the query
// so that the errors are not lost
if preSearchResult.Status.Failed > 0 || len(preSearchResult.Status.Errors) > 0 {
return preSearchResult, nil
}
// finalize the preSearch result now
finalizePreSearchResult(req, flags, preSearchResult)
// if there are no errors, then merge the data in the preSearch result
// and construct the preSearchData to be used in the actual search
// if the request is satisfied by the preSearch result, then we can
// directly return the preSearch result as the final result
if requestSatisfiedByPreSearch(req, flags) {
sr = finalizeSearchResult(ctx, req, preSearchResult, rescorer)
// no need to run the 2nd phase MultiSearch(..)
} else {
preSearchData, fusionKnnHits, err = constructPreSearchDataAndFusionKnnHits(req, flags, preSearchResult, rescorer, i.indexes)
if err != nil {
return nil, err
}
}
preSearchDuration = time.Since(searchStart)
}
// check if search result was generated as part of preSearch itself
if sr == nil {
multiSearchParams := &multiSearchParams{preSearchData, rescorer, fusionKnnHits}
sr, err = MultiSearch(ctx, req, multiSearchParams, i.indexes...)
if err != nil {
return nil, err
}
}
sr.Took += preSearchDuration
return sr, nil
}
func (i *indexAliasImpl) Fields() ([]string, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil, err
}
return i.indexes[0].Fields()
}
func (i *indexAliasImpl) FieldDict(field string) (index.FieldDict, error) {
i.mutex.RLock()
if !i.open {
i.mutex.RUnlock()
return nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
i.mutex.RUnlock()
return nil, err
}
fieldDict, err := i.indexes[0].FieldDict(field)
if err != nil {
i.mutex.RUnlock()
return nil, err
}
return &indexAliasImplFieldDict{
index: i,
fieldDict: fieldDict,
}, nil
}
func (i *indexAliasImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
i.mutex.RLock()
if !i.open {
i.mutex.RUnlock()
return nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
i.mutex.RUnlock()
return nil, err
}
fieldDict, err := i.indexes[0].FieldDictRange(field, startTerm, endTerm)
if err != nil {
i.mutex.RUnlock()
return nil, err
}
return &indexAliasImplFieldDict{
index: i,
fieldDict: fieldDict,
}, nil
}
func (i *indexAliasImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
i.mutex.RLock()
if !i.open {
i.mutex.RUnlock()
return nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
i.mutex.RUnlock()
return nil, err
}
fieldDict, err := i.indexes[0].FieldDictPrefix(field, termPrefix)
if err != nil {
i.mutex.RUnlock()
return nil, err
}
return &indexAliasImplFieldDict{
index: i,
fieldDict: fieldDict,
}, nil
}
func (i *indexAliasImpl) Close() error {
i.mutex.Lock()
defer i.mutex.Unlock()
i.open = false
return nil
}
// SetIndexMapping sets the mapping for the alias and must be used
// ONLY when all the indexes in the alias have the same mapping.
// This is to avoid checking the mapping of each index in the alias
// when executing a search request.
func (i *indexAliasImpl) SetIndexMapping(m mapping.IndexMapping) error {
i.mutex.Lock()
defer i.mutex.Unlock()
if !i.open {
return ErrorIndexClosed
}
i.mapping = m
return nil
}
func (i *indexAliasImpl) Mapping() mapping.IndexMapping {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil
}
// if the mapping is already set, return it
if i.mapping != nil {
return i.mapping
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil
}
return i.indexes[0].Mapping()
}
func (i *indexAliasImpl) Stats() *IndexStat {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil
}
return i.indexes[0].Stats()
}
func (i *indexAliasImpl) StatsMap() map[string]interface{} {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil
}
return i.indexes[0].StatsMap()
}
func (i *indexAliasImpl) GetInternal(key []byte) ([]byte, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil, err
}
return i.indexes[0].GetInternal(key)
}
func (i *indexAliasImpl) SetInternal(key, val []byte) error {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return err
}
return i.indexes[0].SetInternal(key, val)
}
func (i *indexAliasImpl) DeleteInternal(key []byte) error {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return err
}
return i.indexes[0].DeleteInternal(key)
}
func (i *indexAliasImpl) Advanced() (index.Index, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil, err
}
return i.indexes[0].Advanced()
}
func (i *indexAliasImpl) Add(indexes ...Index) {
i.mutex.Lock()
defer i.mutex.Unlock()
i.indexes = append(i.indexes, indexes...)
}
func (i *indexAliasImpl) removeSingle(index Index) {
for pos, in := range i.indexes {
if in == index {
i.indexes = append(i.indexes[:pos], i.indexes[pos+1:]...)
break
}
}
}
func (i *indexAliasImpl) Remove(indexes ...Index) {
i.mutex.Lock()
defer i.mutex.Unlock()
for _, in := range indexes {
i.removeSingle(in)
}
}
func (i *indexAliasImpl) Swap(in, out []Index) {
i.mutex.Lock()
defer i.mutex.Unlock()
// add
i.indexes = append(i.indexes, in...)
// delete
for _, ind := range out {
i.removeSingle(ind)
}
}
// createChildSearchRequest creates a separate
// request from the original
// For now, avoid data race on req structure.
// TODO disable highlight/field load on child
// requests, and add code to do this only on
// the actual final results.
// Perhaps that part needs to be optional,
// could be slower in remote usages.
func createChildSearchRequest(req *SearchRequest, preSearchData map[string]interface{}) *SearchRequest {
return copySearchRequest(req, preSearchData)
}
type asyncSearchResult struct {
Name string
Result *SearchResult
Err error
}
// preSearchFlags is a struct to hold flags indicating why preSearch is required
type preSearchFlags struct {
knn bool
synonyms bool
bm25 bool // needs presearch for this too
}
func isBM25Enabled(m mapping.IndexMapping) bool {
var rv bool
if m, ok := m.(*mapping.IndexMappingImpl); ok {
rv = m.ScoringModel == index.BM25Scoring
}
return rv
}
// preSearchRequired checks if preSearch is required and returns the presearch flags struct
// indicating which preSearch is required
func preSearchRequired(ctx context.Context, req *SearchRequest, m mapping.IndexMapping) (*preSearchFlags, error) {
// Check for KNN query
knn := requestHasKNN(req)
var synonyms bool
if !isMatchNoneQuery(req.Query) {
// Check if synonyms are defined in the mapping
if sm, ok := m.(mapping.SynonymMapping); ok && sm.SynonymCount() > 0 {
// check if any of the fields queried have a synonym source
// in the index mapping, to prevent unnecessary preSearch
fs, err := query.ExtractFields(req.Query, m, nil)
if err != nil {
return nil, err
}
for field := range fs {
if sm.SynonymSourceForPath(field) != "" {
synonyms = true
break
}
}
}
}
var bm25 bool
if !isMatchNoneQuery(req.Query) {
if ctx != nil {
if searchType := ctx.Value(search.SearchTypeKey); searchType != nil {
if searchType.(string) == search.GlobalScoring {
bm25 = isBM25Enabled(m)
}
}
}
}
if knn || synonyms || bm25 {
return &preSearchFlags{
knn: knn,
synonyms: synonyms,
bm25: bm25,
}, nil
}
return nil, nil
}
func preSearch(ctx context.Context, req *SearchRequest, flags *preSearchFlags, indexes ...Index) (*SearchResult, error) {
// create a dummy request with a match none query
// since we only care about the preSearchData in PreSearch
dummyQuery := req.Query
if !flags.bm25 && !flags.synonyms {
// create a dummy request with a match none query
// since we only care about the preSearchData in PreSearch
dummyQuery = query.NewMatchNoneQuery()
}
dummyRequest := &SearchRequest{
Query: dummyQuery,
}
newCtx := context.WithValue(ctx, search.PreSearchKey, true)
if flags.knn {
addKnnToDummyRequest(dummyRequest, req)
}
return preSearchDataSearch(newCtx, dummyRequest, flags, indexes...)
}
// if the request is satisfied by just the preSearch result,
// finalize the result and return it directly without
// performing multi search
func finalizeSearchResult(ctx context.Context, req *SearchRequest, preSearchResult *SearchResult, rescorer *rescorer) *SearchResult {
if preSearchResult == nil {
return nil
}
// global values across all hits irrespective of pagination settings
preSearchResult.Total = uint64(preSearchResult.Hits.Len())
maxScore := float64(0)
for i, hit := range preSearchResult.Hits {
// since we are now using the preSearch result as the final result
// we can discard the indexNames from the hits as they are no longer
// relevant.
hit.IndexNames = nil
if hit.Score > maxScore {
maxScore = hit.Score
}
hit.HitNumber = uint64(i)
}
preSearchResult.MaxScore = maxScore
// now apply pagination settings
var reverseQueryExecution bool
if req.SearchBefore != nil {
reverseQueryExecution = true
req.Sort.Reverse()
req.SearchAfter = req.SearchBefore
}
if req.SearchAfter != nil {
preSearchResult.Hits = collector.FilterHitsBySearchAfter(preSearchResult.Hits, req.Sort, req.SearchAfter)
}
if rescorer != nil {
// rescore takes ftsHits and knnHits as first and second argument respectively
// since this is pure knn, set ftsHits to nil. preSearchResult.Hits contains knn results
preSearchResult.Hits, preSearchResult.Total, preSearchResult.MaxScore = rescorer.rescore(nil, preSearchResult.Hits)
rescorer.restoreSearchRequest()
}
preSearchResult.Hits = hitsInCurrentPage(req, preSearchResult.Hits)
if reverseQueryExecution {
// reverse the sort back to the original
req.Sort.Reverse()
// resort using the original order
mhs := newSearchHitSorter(req.Sort, preSearchResult.Hits)
req.SortFunc()(mhs)
req.SearchAfter = nil
}
if req.Explain {
preSearchResult.Request = req
}
return preSearchResult
}
func requestSatisfiedByPreSearch(req *SearchRequest, flags *preSearchFlags) bool {
if flags == nil {
return false
}
// if the synonyms presearch flag is set the request can never be satisfied by
// the preSearch result as synonyms are not part of the preSearch result
if flags.synonyms {
return false
}
if flags.knn && isKNNrequestSatisfiedByPreSearch(req) {
return true
}
return false
}
func constructSynonymPreSearchData(rv map[string]map[string]interface{}, sr *SearchResult, indexes []Index) map[string]map[string]interface{} {
for _, index := range indexes {
rv[index.Name()][search.SynonymPreSearchDataKey] = sr.SynonymResult
}
return rv
}
func constructBM25PreSearchData(rv map[string]map[string]interface{}, sr *SearchResult, indexes []Index) map[string]map[string]interface{} {
bmStats := sr.BM25Stats
if bmStats != nil {
for _, index := range indexes {
rv[index.Name()][search.BM25PreSearchDataKey] = &search.BM25Stats{
DocCount: bmStats.DocCount,
FieldCardinality: bmStats.FieldCardinality,
}
}
}
return rv
}
func constructPreSearchData(req *SearchRequest, flags *preSearchFlags,
preSearchResult *SearchResult, indexes []Index,
) (map[string]map[string]interface{}, error) {
if flags == nil || preSearchResult == nil {
return nil, fmt.Errorf("invalid input, flags: %v, preSearchResult: %v", flags, preSearchResult)
}
mergedOut := make(map[string]map[string]interface{}, len(indexes))
for _, index := range indexes {
mergedOut[index.Name()] = make(map[string]interface{})
}
var err error
if flags.knn {
mergedOut, err = constructKnnPreSearchData(mergedOut, preSearchResult, indexes)
if err != nil {
return nil, err
}
}
if flags.synonyms {
mergedOut = constructSynonymPreSearchData(mergedOut, preSearchResult, indexes)
}
if flags.bm25 {
mergedOut = constructBM25PreSearchData(mergedOut, preSearchResult, indexes)
}
return mergedOut, nil
}
// Constructs the presearch data if required during the search phase.
// Also if we need to store knn hits at alias.
// If we need to store knn hits at alias: returns all the knn hits
// If we should send it to leaf indexes: includes in presearch data
func constructPreSearchDataAndFusionKnnHits(req *SearchRequest, flags *preSearchFlags,
preSearchResult *SearchResult, rescorer *rescorer, indexes []Index,
) (map[string]map[string]interface{}, search.DocumentMatchCollection, error) {
var fusionknnhits search.DocumentMatchCollection
// Checks if we need to send the KNN hits to the indexes in the
// search phase. If there is score fusion enabled, we do not
// send the KNN hits to the indexes.
if rescorer != nil && flags.knn {
fusionknnhits = preSearchResult.Hits
preSearchResult.Hits = nil
}
preSearchData, err := constructPreSearchData(req, flags, preSearchResult, indexes)
if err != nil {
return nil, nil, err
}
return preSearchData, fusionknnhits, nil
}
func preSearchDataSearch(ctx context.Context, req *SearchRequest, flags *preSearchFlags, indexes ...Index) (*SearchResult, error) {
asyncResults := make(chan *asyncSearchResult, len(indexes))
// run search on each index in separate go routine
var waitGroup sync.WaitGroup
searchChildIndex := func(in Index, childReq *SearchRequest) {
rv := asyncSearchResult{Name: in.Name()}
rv.Result, rv.Err = in.SearchInContext(ctx, childReq)
asyncResults <- &rv
waitGroup.Done()
}
waitGroup.Add(len(indexes))
for _, in := range indexes {
go searchChildIndex(in, createChildSearchRequest(req, nil))
}
// on another go routine, close after finished
go func() {
waitGroup.Wait()
close(asyncResults)
}()
// the final search result to be returned after combining the preSearch results
var sr *SearchResult
// the preSearch result processor
var prp preSearchResultProcessor
// error map
indexErrors := make(map[string]error)
for asr := range asyncResults {
if asr.Err == nil {
// a valid preSearch result
if prp == nil {
// first valid preSearch result
// create a new preSearch result processor
prp = createPreSearchResultProcessor(req, flags)
}
prp.add(asr.Result, asr.Name)
if sr == nil {
// first result
sr = &SearchResult{
Status: asr.Result.Status,
Cost: asr.Result.Cost,
}
} else {
// merge with previous
sr.Status.Merge(asr.Result.Status)
sr.Cost += asr.Result.Cost
}
} else {
indexErrors[asr.Name] = asr.Err
}
}
// handle case where no results were successful
if sr == nil {
sr = &SearchResult{
Status: &SearchStatus{
Errors: make(map[string]error),
},
}
}
// in preSearch, partial results are not allowed as it can lead to
// the real search giving incorrect results, and hence the search
// result is not populated with any of the processed data from
// the preSearch result processor if there are any errors
// or the preSearch result status has any failures
if len(indexErrors) > 0 || sr.Status.Failed > 0 {
if sr.Status.Errors == nil {
sr.Status.Errors = make(map[string]error)
}
for indexName, indexErr := range indexErrors {
sr.Status.Errors[indexName] = indexErr
sr.Status.Total++
}
// At this point, all errors have been recorded—either from the preSearch phase
// (via status.Merge) or from individual index search failures (indexErrors).
// Since partial results are not allowed, mark the entire request as failed.
sr.Status.Successful = 0
sr.Status.Failed = sr.Status.Total
} else {
prp.finalize(sr)
}
return sr, nil
}
// redistributePreSearchData redistributes the preSearchData sent in the search request to an index alias
// which would happen in the case of an alias tree and depending on the level of the tree, the preSearchData
// needs to be redistributed to the indexes at that level
func redistributePreSearchData(req *SearchRequest, indexes []Index) (map[string]map[string]interface{}, error) {
rv := make(map[string]map[string]interface{}, len(indexes))
for _, index := range indexes {
rv[index.Name()] = make(map[string]interface{})
}
if knnHits, ok := req.PreSearchData[search.KnnPreSearchDataKey].([]*search.DocumentMatch); ok {
// the preSearchData for KNN is a list of DocumentMatch objects
// that need to be redistributed to the right index.
// This is used only in the case of an alias tree, where the indexes
// are at the leaves of the tree, and the master alias is at the root.
// At each level of the tree, the preSearchData needs to be redistributed
// to the indexes/aliases at that level. Because the preSearchData is
// specific to each final index at the leaf.
segregatedKnnHits, err := validateAndDistributeKNNHits(knnHits, indexes)
if err != nil {
return nil, err
}
for _, index := range indexes {
rv[index.Name()][search.KnnPreSearchDataKey] = segregatedKnnHits[index.Name()]
}
}
if fts, ok := req.PreSearchData[search.SynonymPreSearchDataKey].(search.FieldTermSynonymMap); ok {
for _, index := range indexes {
rv[index.Name()][search.SynonymPreSearchDataKey] = fts
}
}
if bm25Data, ok := req.PreSearchData[search.BM25PreSearchDataKey].(*search.BM25Stats); ok {
for _, index := range indexes {
rv[index.Name()][search.BM25PreSearchDataKey] = bm25Data
}
}
return rv, nil
}
// finalizePreSearchResult finalizes the preSearch result by applying the finalization steps
// specific to the preSearch flags
func finalizePreSearchResult(req *SearchRequest, flags *preSearchFlags, preSearchResult *SearchResult) {
// if flags is nil then return
if flags == nil {
return
}
if flags.knn {
preSearchResult.Hits = finalizeKNNResults(req, preSearchResult.Hits)
}
}
// hitsInCurrentPage returns the hits in the current page
// using the From and Size parameters in the request
func hitsInCurrentPage(req *SearchRequest, hits []*search.DocumentMatch) []*search.DocumentMatch {
sortFunc := req.SortFunc()
// sort all hits with the requested order
if len(req.Sort) > 0 {
sorter := newSearchHitSorter(req.Sort, hits)
sortFunc(sorter)
}
// now skip over the correct From
if req.From > 0 && len(hits) > req.From {
hits = hits[req.From:]
} else if req.From > 0 {
hits = search.DocumentMatchCollection{}
}
// now trim to the correct size
if req.Size > 0 && len(hits) > req.Size {
hits = hits[0:req.Size]
}
return hits
}
// Extra parameters for MultiSearch
type multiSearchParams struct {
preSearchData map[string]map[string]interface{}
rescorer *rescorer
fusionKnnHits search.DocumentMatchCollection
}
// MultiSearch executes a SearchRequest across multiple Index objects,
// then merges the results. The indexes must honor any ctx deadline.
func MultiSearch(ctx context.Context, req *SearchRequest, params *multiSearchParams, indexes ...Index) (*SearchResult, error) {
searchStart := time.Now()
asyncResults := make(chan *asyncSearchResult, len(indexes))
var preSearchData map[string]map[string]interface{}
var rescorer *rescorer
var fusionKnnHits search.DocumentMatchCollection
if params != nil {
preSearchData = params.preSearchData
rescorer = params.rescorer
fusionKnnHits = params.fusionKnnHits
}
var reverseQueryExecution bool
if req.SearchBefore != nil {
reverseQueryExecution = true
req.Sort.Reverse()
req.SearchAfter = req.SearchBefore
req.SearchBefore = nil
}
// run search on each index in separate go routine
var waitGroup sync.WaitGroup
searchChildIndex := func(in Index, childReq *SearchRequest) {
rv := asyncSearchResult{Name: in.Name()}
rv.Result, rv.Err = in.SearchInContext(ctx, childReq)
asyncResults <- &rv
waitGroup.Done()
}
waitGroup.Add(len(indexes))
for _, in := range indexes {
var payload map[string]interface{}
if preSearchData != nil {
payload = preSearchData[in.Name()]
}
go searchChildIndex(in, createChildSearchRequest(req, payload))
}
// on another go routine, close after finished
go func() {
waitGroup.Wait()
close(asyncResults)
}()
var sr *SearchResult
indexErrors := make(map[string]error)
for asr := range asyncResults {
if asr.Err == nil {
if sr == nil {
// first result
sr = asr.Result
} else {
// merge with previous
sr.Merge(asr.Result)
}
} else {
indexErrors[asr.Name] = asr.Err
}
}
// merge just concatenated all the hits
// now lets clean it up
// handle case where no results were successful
if sr == nil {
sr = &SearchResult{
Status: &SearchStatus{
Errors: make(map[string]error),
},
}
}
if rescorer != nil {
sr.Hits, sr.Total, sr.MaxScore = rescorer.rescore(sr.Hits, fusionKnnHits)
rescorer.restoreSearchRequest()
}
sr.Hits = hitsInCurrentPage(req, sr.Hits)
// fix up facets
for name, fr := range req.Facets {
sr.Facets.Fixup(name, fr.Size)
}
if reverseQueryExecution {
// reverse the sort back to the original
req.Sort.Reverse()
// resort using the original order
mhs := newSearchHitSorter(req.Sort, sr.Hits)
req.SortFunc()(mhs)
// reset request
req.SearchBefore = req.SearchAfter
req.SearchAfter = nil
}
// fix up original request
if req.Explain {
sr.Request = req
}
searchDuration := time.Since(searchStart)
sr.Took = searchDuration
// fix up errors
if len(indexErrors) > 0 {
if sr.Status.Errors == nil {
sr.Status.Errors = make(map[string]error)
}
for indexName, indexErr := range indexErrors {
sr.Status.Errors[indexName] = indexErr
sr.Status.Total++
sr.Status.Failed++
}
}
return sr, nil
}
func (i *indexAliasImpl) NewBatch() *Batch {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil
}
err := i.isAliasToSingleIndex()
if err != nil {
return nil
}
return i.indexes[0].NewBatch()
}
func (i *indexAliasImpl) Name() string {
return i.name
}
func (i *indexAliasImpl) SetName(name string) {
i.name = name
}
type indexAliasImplFieldDict struct {
index *indexAliasImpl
fieldDict index.FieldDict
}
func (f *indexAliasImplFieldDict) BytesRead() uint64 {
return f.fieldDict.BytesRead()
}
func (f *indexAliasImplFieldDict) Next() (*index.DictEntry, error) {
return f.fieldDict.Next()
}
func (f *indexAliasImplFieldDict) Close() error {
defer f.index.mutex.RUnlock()
return f.fieldDict.Close()
}
func (f *indexAliasImplFieldDict) Cardinality() int {
return f.fieldDict.Cardinality()
}
// -----------------------------------------------------------------------------
func (i *indexAliasImpl) TermFrequencies(field string, limit int, descending bool) (
[]index.TermFreq, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
if len(i.indexes) < 1 {
return nil, ErrorAliasEmpty
}
// short circuit the simple case
if len(i.indexes) == 1 {
if idx, ok := i.indexes[0].(InsightsIndex); ok {
return idx.TermFrequencies(field, limit, descending)
}
return nil, nil
}
// run search on each index in separate go routine
var waitGroup sync.WaitGroup
asyncResults := make(chan []index.TermFreq, len(i.indexes))
searchChildIndex := func(in Index, field string, limit int, descending bool) {
var rv []index.TermFreq
if idx, ok := in.(InsightsIndex); ok {
// over sample for higher accuracy
rv, _ = idx.TermFrequencies(field, limit*5, descending)
}
asyncResults <- rv
waitGroup.Done()
}
waitGroup.Add(len(i.indexes))
for _, in := range i.indexes {
go searchChildIndex(in, field, limit, descending)
}
// on another go routine, close after finished
go func() {
waitGroup.Wait()
close(asyncResults)
}()
rvTermFreqsMap := make(map[string]uint64)
for asr := range asyncResults {
for _, entry := range asr {
rvTermFreqsMap[entry.Term] += entry.Frequency
}
}
rvTermFreqs := make([]index.TermFreq, 0, len(rvTermFreqsMap))
for term, freq := range rvTermFreqsMap {
rvTermFreqs = append(rvTermFreqs, index.TermFreq{
Term: term,
Frequency: freq,
})
}
sort.Slice(rvTermFreqs, func(i, j int) bool {
if rvTermFreqs[i].Frequency == rvTermFreqs[j].Frequency {
// If frequencies are equal, sort by term lexicographically
return rvTermFreqs[i].Term < rvTermFreqs[j].Term
}
if descending {
return rvTermFreqs[i].Frequency > rvTermFreqs[j].Frequency
}
return rvTermFreqs[i].Frequency < rvTermFreqs[j].Frequency
})
if limit > len(rvTermFreqs) {
limit = len(rvTermFreqs)
}
return rvTermFreqs[:limit], nil
}
func (i *indexAliasImpl) CentroidCardinalities(field string, limit int, descending bool) (
[]index.CentroidCardinality, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
if len(i.indexes) < 1 {
return nil, ErrorAliasEmpty
}
// short circuit the simple case
if len(i.indexes) == 1 {
if idx, ok := i.indexes[0].(InsightsIndex); ok {
return idx.CentroidCardinalities(field, limit, descending)
}
return nil, nil
}
// run search on each index in separate go routine
var waitGroup sync.WaitGroup
asyncResults := make(chan []index.CentroidCardinality, len(i.indexes))
searchChildIndex := func(in Index, field string, limit int, descending bool) {
var rv []index.CentroidCardinality
if idx, ok := in.(InsightsIndex); ok {
rv, _ = idx.CentroidCardinalities(field, limit, descending)
}
asyncResults <- rv
waitGroup.Done()
}
waitGroup.Add(len(i.indexes))
for _, in := range i.indexes {
go searchChildIndex(in, field, limit, descending)
}
// on another go routine, close after finished
go func() {
waitGroup.Wait()
close(asyncResults)
}()
rvCentroidCardinalities := make([]index.CentroidCardinality, 0, limit*len(i.indexes))
for asr := range asyncResults {
rvCentroidCardinalities = append(rvCentroidCardinalities, asr...)
}
sort.Slice(rvCentroidCardinalities, func(i, j int) bool {
if descending {
return rvCentroidCardinalities[i].Cardinality > rvCentroidCardinalities[j].Cardinality
} else {
return rvCentroidCardinalities[i].Cardinality < rvCentroidCardinalities[j].Cardinality
}
})
if limit > len(rvCentroidCardinalities) {
limit = len(rvCentroidCardinalities)
}
return rvCentroidCardinalities[:limit], nil
}
================================================
FILE: index_alias_impl_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"context"
"fmt"
"reflect"
"testing"
"time"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestIndexAliasSingle(t *testing.T) {
expectedError := fmt.Errorf("expected")
ei1 := &stubIndex{
err: expectedError,
}
alias := NewIndexAlias(ei1)
err := alias.Index("a", "a")
if err != expectedError {
t.Errorf("expected %v, got %v", expectedError, err)
}
err = alias.Delete("a")
if err != expectedError {
t.Errorf("expected %v, got %v", expectedError, err)
}
batch := alias.NewBatch()
err = alias.Batch(batch)
if err != expectedError {
t.Errorf("expected %v, got %v", expectedError, err)
}
_, err = alias.Document("a")
if err != expectedError {
t.Errorf("expected %v, got %v", expectedError, err)
}
_, err = alias.Fields()
if err != expectedError {
t.Errorf("expected %v, got %v", expectedError, err)
}
_, err = alias.GetInternal([]byte("a"))
if err != expectedError {
t.Errorf("expected %v, got %v", expectedError, err)
}
err = alias.SetInternal([]byte("a"), []byte("a"))
if err != expectedError {
t.Errorf("expected %v, got %v", expectedError, err)
}
err = alias.DeleteInternal([]byte("a"))
if err != expectedError {
t.Errorf("expected %v, got %v", expectedError, err)
}
mapping := alias.Mapping()
if mapping != nil {
t.Errorf("expected nil, got %v", mapping)
}
indexStat := alias.Stats()
if indexStat != nil {
t.Errorf("expected nil, got %v", indexStat)
}
// now a few things that should work
sr := NewSearchRequest(NewTermQuery("test"))
_, err = alias.Search(sr)
if err != expectedError {
t.Errorf("expected %v, got %v", expectedError, err)
}
count, err := alias.DocCount()
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if count != 0 {
t.Errorf("expected count 0, got %d", count)
}
// now change the def using add/remove
expectedError2 := fmt.Errorf("expected2")
ei2 := &stubIndex{
err: expectedError2,
}
alias.Add(ei2)
alias.Remove(ei1)
err = alias.Index("a", "a")
if err != expectedError2 {
t.Errorf("expected %v, got %v", expectedError2, err)
}
err = alias.Delete("a")
if err != expectedError2 {
t.Errorf("expected %v, got %v", expectedError2, err)
}
err = alias.Batch(batch)
if err != expectedError2 {
t.Errorf("expected %v, got %v", expectedError2, err)
}
_, err = alias.Document("a")
if err != expectedError2 {
t.Errorf("expected %v, got %v", expectedError2, err)
}
_, err = alias.Fields()
if err != expectedError2 {
t.Errorf("expected %v, got %v", expectedError2, err)
}
_, err = alias.GetInternal([]byte("a"))
if err != expectedError2 {
t.Errorf("expected %v, got %v", expectedError2, err)
}
err = alias.SetInternal([]byte("a"), []byte("a"))
if err != expectedError2 {
t.Errorf("expected %v, got %v", expectedError2, err)
}
err = alias.DeleteInternal([]byte("a"))
if err != expectedError2 {
t.Errorf("expected %v, got %v", expectedError2, err)
}
mapping = alias.Mapping()
if mapping != nil {
t.Errorf("expected nil, got %v", mapping)
}
indexStat = alias.Stats()
if indexStat != nil {
t.Errorf("expected nil, got %v", indexStat)
}
// now a few things that should work
_, err = alias.Search(sr)
if err != expectedError2 {
t.Errorf("expected %v, got %v", expectedError2, err)
}
count, err = alias.DocCount()
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if count != 0 {
t.Errorf("expected count 0, got %d", count)
}
// now change the def using swap
expectedError3 := fmt.Errorf("expected3")
ei3 := &stubIndex{
err: expectedError3,
}
alias.Swap([]Index{ei3}, []Index{ei2})
err = alias.Index("a", "a")
if err != expectedError3 {
t.Errorf("expected %v, got %v", expectedError3, err)
}
err = alias.Delete("a")
if err != expectedError3 {
t.Errorf("expected %v, got %v", expectedError3, err)
}
err = alias.Batch(batch)
if err != expectedError3 {
t.Errorf("expected %v, got %v", expectedError3, err)
}
_, err = alias.Document("a")
if err != expectedError3 {
t.Errorf("expected %v, got %v", expectedError3, err)
}
_, err = alias.Fields()
if err != expectedError3 {
t.Errorf("expected %v, got %v", expectedError3, err)
}
_, err = alias.GetInternal([]byte("a"))
if err != expectedError3 {
t.Errorf("expected %v, got %v", expectedError3, err)
}
err = alias.SetInternal([]byte("a"), []byte("a"))
if err != expectedError3 {
t.Errorf("expected %v, got %v", expectedError3, err)
}
err = alias.DeleteInternal([]byte("a"))
if err != expectedError3 {
t.Errorf("expected %v, got %v", expectedError3, err)
}
mapping = alias.Mapping()
if mapping != nil {
t.Errorf("expected nil, got %v", mapping)
}
indexStat = alias.Stats()
if indexStat != nil {
t.Errorf("expected nil, got %v", indexStat)
}
// now a few things that should work
_, err = alias.Search(sr)
if err != expectedError3 {
t.Errorf("expected %v, got %v", expectedError3, err)
}
count, err = alias.DocCount()
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if count != 0 {
t.Errorf("expected count 0, got %d", count)
}
}
func TestIndexAliasClosed(t *testing.T) {
alias := NewIndexAlias()
err := alias.Close()
if err != nil {
t.Fatal(err)
}
err = alias.Index("a", "a")
if err != ErrorIndexClosed {
t.Errorf("expected %v, got %v", ErrorIndexClosed, err)
}
err = alias.Delete("a")
if err != ErrorIndexClosed {
t.Errorf("expected %v, got %v", ErrorIndexClosed, err)
}
batch := alias.NewBatch()
err = alias.Batch(batch)
if err != ErrorIndexClosed {
t.Errorf("expected %v, got %v", ErrorIndexClosed, err)
}
_, err = alias.Document("a")
if err != ErrorIndexClosed {
t.Errorf("expected %v, got %v", ErrorIndexClosed, err)
}
_, err = alias.Fields()
if err != ErrorIndexClosed {
t.Errorf("expected %v, got %v", ErrorIndexClosed, err)
}
_, err = alias.GetInternal([]byte("a"))
if err != ErrorIndexClosed {
t.Errorf("expected %v, got %v", ErrorIndexClosed, err)
}
err = alias.SetInternal([]byte("a"), []byte("a"))
if err != ErrorIndexClosed {
t.Errorf("expected %v, got %v", ErrorIndexClosed, err)
}
err = alias.DeleteInternal([]byte("a"))
if err != ErrorIndexClosed {
t.Errorf("expected %v, got %v", ErrorIndexClosed, err)
}
mapping := alias.Mapping()
if mapping != nil {
t.Errorf("expected nil, got %v", mapping)
}
indexStat := alias.Stats()
if indexStat != nil {
t.Errorf("expected nil, got %v", indexStat)
}
// now a few things that should work
sr := NewSearchRequest(NewTermQuery("test"))
_, err = alias.Search(sr)
if err != ErrorIndexClosed {
t.Errorf("expected %v, got %v", ErrorIndexClosed, err)
}
_, err = alias.DocCount()
if err != ErrorIndexClosed {
t.Errorf("expected %v, got %v", ErrorIndexClosed, err)
}
}
func TestIndexAliasEmpty(t *testing.T) {
alias := NewIndexAlias()
err := alias.Index("a", "a")
if err != ErrorAliasEmpty {
t.Errorf("expected %v, got %v", ErrorAliasEmpty, err)
}
err = alias.Delete("a")
if err != ErrorAliasEmpty {
t.Errorf("expected %v, got %v", ErrorAliasEmpty, err)
}
batch := alias.NewBatch()
err = alias.Batch(batch)
if err != ErrorAliasEmpty {
t.Errorf("expected %v, got %v", ErrorAliasEmpty, err)
}
_, err = alias.Document("a")
if err != ErrorAliasEmpty {
t.Errorf("expected %v, got %v", ErrorAliasEmpty, err)
}
_, err = alias.Fields()
if err != ErrorAliasEmpty {
t.Errorf("expected %v, got %v", ErrorAliasEmpty, err)
}
_, err = alias.GetInternal([]byte("a"))
if err != ErrorAliasEmpty {
t.Errorf("expected %v, got %v", ErrorAliasEmpty, err)
}
err = alias.SetInternal([]byte("a"), []byte("a"))
if err != ErrorAliasEmpty {
t.Errorf("expected %v, got %v", ErrorAliasEmpty, err)
}
err = alias.DeleteInternal([]byte("a"))
if err != ErrorAliasEmpty {
t.Errorf("expected %v, got %v", ErrorAliasEmpty, err)
}
mapping := alias.Mapping()
if mapping != nil {
t.Errorf("expected nil, got %v", mapping)
}
indexStat := alias.Stats()
if indexStat != nil {
t.Errorf("expected nil, got %v", indexStat)
}
// now a few things that should work
sr := NewSearchRequest(NewTermQuery("test"))
_, err = alias.Search(sr)
if err != ErrorAliasEmpty {
t.Errorf("expected %v, got %v", ErrorAliasEmpty, err)
}
count, err := alias.DocCount()
if err != nil {
t.Errorf("error getting alias doc count: %v", err)
}
if count != 0 {
t.Errorf("expected %d, got %d", 0, count)
}
}
func TestIndexAliasMulti(t *testing.T) {
score1, _ := numeric.NewPrefixCodedInt64(numeric.Float64ToInt64(1.0), 0)
score2, _ := numeric.NewPrefixCodedInt64(numeric.Float64ToInt64(2.0), 0)
ei1Count := uint64(7)
ei1 := &stubIndex{
err: nil,
docCountResult: &ei1Count,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: search.DocumentMatchCollection{
{
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 1.0,
},
}
ei2Count := uint64(8)
ei2 := &stubIndex{
err: nil,
docCountResult: &ei2Count,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: search.DocumentMatchCollection{
{
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
},
},
MaxScore: 2.0,
},
}
alias := NewIndexAlias(ei1, ei2)
err := alias.Index("a", "a")
if err != ErrorAliasMulti {
t.Errorf("expected %v, got %v", ErrorAliasMulti, err)
}
err = alias.Delete("a")
if err != ErrorAliasMulti {
t.Errorf("expected %v, got %v", ErrorAliasMulti, err)
}
batch := alias.NewBatch()
err = alias.Batch(batch)
if err != ErrorAliasMulti {
t.Errorf("expected %v, got %v", ErrorAliasMulti, err)
}
_, err = alias.Document("a")
if err != ErrorAliasMulti {
t.Errorf("expected %v, got %v", ErrorAliasMulti, err)
}
_, err = alias.Fields()
if err != ErrorAliasMulti {
t.Errorf("expected %v, got %v", ErrorAliasMulti, err)
}
_, err = alias.GetInternal([]byte("a"))
if err != ErrorAliasMulti {
t.Errorf("expected %v, got %v", ErrorAliasMulti, err)
}
err = alias.SetInternal([]byte("a"), []byte("a"))
if err != ErrorAliasMulti {
t.Errorf("expected %v, got %v", ErrorAliasMulti, err)
}
err = alias.DeleteInternal([]byte("a"))
if err != ErrorAliasMulti {
t.Errorf("expected %v, got %v", ErrorAliasMulti, err)
}
mapping := alias.Mapping()
if mapping != nil {
t.Errorf("expected nil, got %v", mapping)
}
indexStat := alias.Stats()
if indexStat != nil {
t.Errorf("expected nil, got %v", indexStat)
}
// now a few things that should work
sr := NewSearchRequest(NewTermQuery("test"))
expected := &SearchResult{
Status: &SearchStatus{
Total: 2,
Successful: 2,
Errors: make(map[string]error),
},
Total: 2,
Hits: search.DocumentMatchCollection{
{
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
},
{
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 2.0,
}
results, err := alias.Search(sr)
if err != nil {
t.Error(err)
}
// cheat and ensure that Took field matches since it involves time
expected.Took = results.Took
if !reflect.DeepEqual(results, expected) {
t.Errorf("expected %#v, got %#v", expected, results)
}
count, err := alias.DocCount()
if err != nil {
t.Errorf("error getting alias doc count: %v", err)
}
if count != (*ei1.docCountResult + *ei2.docCountResult) {
t.Errorf("expected %d, got %d", (*ei1.docCountResult + *ei2.docCountResult), count)
}
}
// TestMultiSearchNoError
func TestMultiSearchNoError(t *testing.T) {
score1, _ := numeric.NewPrefixCodedInt64(numeric.Float64ToInt64(1.0), 0)
score2, _ := numeric.NewPrefixCodedInt64(numeric.Float64ToInt64(2.0), 0)
ei1 := &stubIndex{err: nil, searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: search.DocumentMatchCollection{
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 1.0,
}}
ei2 := &stubIndex{err: nil, searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: search.DocumentMatchCollection{
{
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
},
},
MaxScore: 2.0,
}}
sr := NewSearchRequest(NewTermQuery("test"))
expected := &SearchResult{
Status: &SearchStatus{
Total: 2,
Successful: 2,
Errors: make(map[string]error),
},
Total: 2,
Hits: search.DocumentMatchCollection{
{
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
},
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 2.0,
}
results, err := MultiSearch(context.Background(), sr, nil, ei1, ei2)
if err != nil {
t.Error(err)
}
// cheat and ensure that Took field matches since it involves time
expected.Took = results.Took
if !reflect.DeepEqual(results, expected) {
t.Errorf("expected %#v, got %#v", expected, results)
}
}
// TestMultiSearchSomeError
func TestMultiSearchSomeError(t *testing.T) {
ei1 := &stubIndex{name: "ei1", err: nil, searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: search.DocumentMatchCollection{
{
ID: "a",
Score: 1.0,
},
},
Took: 1 * time.Second,
MaxScore: 1.0,
}}
ei2 := &stubIndex{name: "ei2", err: fmt.Errorf("deliberate error")}
sr := NewSearchRequest(NewTermQuery("test"))
res, err := MultiSearch(context.Background(), sr, nil, ei1, ei2)
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if res.Status.Total != 2 {
t.Errorf("expected 2 indexes to be queried, got %d", res.Status.Total)
}
if res.Status.Failed != 1 {
t.Errorf("expected 1 index to fail, got %d", res.Status.Failed)
}
if res.Status.Successful != 1 {
t.Errorf("expected 1 index to be successful, got %d", res.Status.Successful)
}
if len(res.Status.Errors) != 1 {
t.Fatalf("expected 1 status error message, got %d", len(res.Status.Errors))
}
if res.Status.Errors["ei2"].Error() != "deliberate error" {
t.Errorf("expected ei2 index error message 'deliberate error', got '%s'", res.Status.Errors["ei2"])
}
}
// TestMultiSearchAllError
// reproduces https://github.com/blevesearch/bleve/issues/126
func TestMultiSearchAllError(t *testing.T) {
ei1 := &stubIndex{name: "ei1", err: fmt.Errorf("deliberate error")}
ei2 := &stubIndex{name: "ei2", err: fmt.Errorf("deliberate error")}
sr := NewSearchRequest(NewTermQuery("test"))
res, err := MultiSearch(context.Background(), sr, nil, ei1, ei2)
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if res.Status.Total != 2 {
t.Errorf("expected 2 indexes to be queried, got %d", res.Status.Total)
}
if res.Status.Failed != 2 {
t.Errorf("expected 2 indexes to fail, got %d", res.Status.Failed)
}
if res.Status.Successful != 0 {
t.Errorf("expected 0 indexes to be successful, got %d", res.Status.Successful)
}
if len(res.Status.Errors) != 2 {
t.Fatalf("expected 2 status error messages, got %d", len(res.Status.Errors))
}
if res.Status.Errors["ei1"].Error() != "deliberate error" {
t.Errorf("expected ei1 index error message 'deliberate error', got '%s'", res.Status.Errors["ei1"])
}
if res.Status.Errors["ei2"].Error() != "deliberate error" {
t.Errorf("expected ei2 index error message 'deliberate error', got '%s'", res.Status.Errors["ei2"])
}
}
func TestMultiSearchSecondPage(t *testing.T) {
checkRequest := func(sr *SearchRequest) error {
if sr.From != 0 {
return fmt.Errorf("child request from should be 0")
}
if sr.Size != 20 {
return fmt.Errorf("child request size should be 20")
}
return nil
}
ei1 := &stubIndex{
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
},
checkRequest: checkRequest,
}
ei2 := &stubIndex{
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
},
checkRequest: checkRequest,
}
sr := NewSearchRequestOptions(NewTermQuery("test"), 10, 10, false)
_, err := MultiSearch(context.Background(), sr, nil, ei1, ei2)
if err != nil {
t.Errorf("unexpected error %v", err)
}
}
// TestMultiSearchTimeout tests simple timeout cases
// 1. all searches finish successfully before timeout
// 2. no searchers finish before the timeout
// 3. no searches finish before cancellation
func TestMultiSearchTimeout(t *testing.T) {
score1, _ := numeric.NewPrefixCodedInt64(numeric.Float64ToInt64(1.0), 0)
score2, _ := numeric.NewPrefixCodedInt64(numeric.Float64ToInt64(2.0), 0)
var ctx context.Context
ei1 := &stubIndex{
name: "ei1",
checkRequest: func(req *SearchRequest) error {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(50 * time.Millisecond):
return nil
}
},
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 1.0,
},
}
ei2 := &stubIndex{
name: "ei2",
checkRequest: func(req *SearchRequest) error {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(50 * time.Millisecond):
return nil
}
},
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
{
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
},
},
MaxScore: 2.0,
},
}
// first run with absurdly long time out, should succeed
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
query := NewTermQuery("test")
sr := NewSearchRequest(query)
res, err := MultiSearch(ctx, sr, nil, ei1, ei2)
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if res.Status.Total != 2 {
t.Errorf("expected 2 total, got %d", res.Status.Failed)
}
if res.Status.Successful != 2 {
t.Errorf("expected 0 success, got %d", res.Status.Successful)
}
if res.Status.Failed != 0 {
t.Errorf("expected 2 failed, got %d", res.Status.Failed)
}
if len(res.Status.Errors) != 0 {
t.Errorf("expected 0 errors, got %v", res.Status.Errors)
}
// now run a search again with an absurdly low timeout (should timeout)
ctx, cancel = context.WithTimeout(context.Background(), 1*time.Microsecond)
defer cancel()
res, err = MultiSearch(ctx, sr, nil, ei1, ei2)
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if res.Status.Total != 2 {
t.Errorf("expected 2 failed, got %d", res.Status.Failed)
}
if res.Status.Successful != 0 {
t.Errorf("expected 0 success, got %d", res.Status.Successful)
}
if res.Status.Failed != 2 {
t.Errorf("expected 2 failed, got %d", res.Status.Failed)
}
if len(res.Status.Errors) != 2 {
t.Errorf("expected 2 errors, got %v", res.Status.Errors)
} else {
if res.Status.Errors["ei1"].Error() != context.DeadlineExceeded.Error() {
t.Errorf("expected err for 'ei1' to be '%s' got '%s'", context.DeadlineExceeded.Error(), res.Status.Errors["ei1"])
}
if res.Status.Errors["ei2"].Error() != context.DeadlineExceeded.Error() {
t.Errorf("expected err for 'ei2' to be '%s' got '%s'", context.DeadlineExceeded.Error(), res.Status.Errors["ei2"])
}
}
// now run a search again with a normal timeout, but cancel it first
ctx, cancel = context.WithTimeout(context.Background(), 5*time.Second)
cancel()
res, err = MultiSearch(ctx, sr, nil, ei1, ei2)
if err != nil {
t.Errorf("expected no error, got %v", err)
}
if res.Status.Total != 2 {
t.Errorf("expected 2 failed, got %d", res.Status.Failed)
}
if res.Status.Successful != 0 {
t.Errorf("expected 0 success, got %d", res.Status.Successful)
}
if res.Status.Failed != 2 {
t.Errorf("expected 2 failed, got %d", res.Status.Failed)
}
if len(res.Status.Errors) != 2 {
t.Errorf("expected 2 errors, got %v", res.Status.Errors)
} else {
if res.Status.Errors["ei1"].Error() != context.Canceled.Error() {
t.Errorf("expected err for 'ei1' to be '%s' got '%s'", context.Canceled.Error(), res.Status.Errors["ei1"])
}
if res.Status.Errors["ei2"].Error() != context.Canceled.Error() {
t.Errorf("expected err for 'ei2' to be '%s' got '%s'", context.Canceled.Error(), res.Status.Errors["ei2"])
}
}
}
// TestMultiSearchTimeoutPartial tests the case where some indexes exceed
// the timeout, while others complete successfully
func TestMultiSearchTimeoutPartial(t *testing.T) {
score1, _ := numeric.NewPrefixCodedInt64(numeric.Float64ToInt64(1.0), 0)
score2, _ := numeric.NewPrefixCodedInt64(numeric.Float64ToInt64(2.0), 0)
score3, _ := numeric.NewPrefixCodedInt64(numeric.Float64ToInt64(3.0), 0)
var ctx context.Context
ei1 := &stubIndex{
name: "ei1",
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 1.0,
},
}
ei2 := &stubIndex{
name: "ei2",
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
{
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
},
},
MaxScore: 2.0,
},
}
ei3 := &stubIndex{
name: "ei3",
checkRequest: func(req *SearchRequest) error {
<-ctx.Done()
return ctx.Err()
},
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
{
Index: "3",
ID: "c",
Score: 3.0,
Sort: []string{string(score3)},
},
},
MaxScore: 3.0,
},
}
// ei3 is set to take >50ms, so run search with timeout less than
// this, this should return partial results
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(context.Background(), 25*time.Millisecond)
defer cancel()
query := NewTermQuery("test")
sr := NewSearchRequest(query)
expected := &SearchResult{
Status: &SearchStatus{
Total: 3,
Successful: 2,
Failed: 1,
Errors: map[string]error{
"ei3": context.DeadlineExceeded,
},
},
Total: 2,
Hits: search.DocumentMatchCollection{
{
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
},
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 2.0,
}
res, err := MultiSearch(ctx, sr, nil, ei1, ei2, ei3)
if err != nil {
t.Fatalf("expected no err, got %v", err)
}
expected.Took = res.Took
if !reflect.DeepEqual(res, expected) {
t.Errorf("expected %#v, got %#v", expected, res)
}
}
func TestIndexAliasMultipleLayer(t *testing.T) {
score1, _ := numeric.NewPrefixCodedInt64(numeric.Float64ToInt64(1.0), 0)
score2, _ := numeric.NewPrefixCodedInt64(numeric.Float64ToInt64(2.0), 0)
score3, _ := numeric.NewPrefixCodedInt64(numeric.Float64ToInt64(3.0), 0)
score4, _ := numeric.NewPrefixCodedInt64(numeric.Float64ToInt64(4.0), 0)
var ctx context.Context
ei1 := &stubIndex{
name: "ei1",
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 1.0,
},
}
ei2 := &stubIndex{
name: "ei2",
checkRequest: func(req *SearchRequest) error {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(250 * time.Millisecond):
return nil
}
},
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
{
Index: "2",
ID: "b",
Score: 2.0,
Sort: []string{string(score2)},
},
},
MaxScore: 2.0,
},
}
ei3 := &stubIndex{
name: "ei3",
checkRequest: func(req *SearchRequest) error {
select {
case <-ctx.Done():
return ctx.Err()
case <-time.After(250 * time.Millisecond):
return nil
}
},
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
{
Index: "3",
ID: "c",
Score: 3.0,
Sort: []string{string(score3)},
},
},
MaxScore: 3.0,
},
}
ei4 := &stubIndex{
name: "ei4",
err: nil,
searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
Hits: []*search.DocumentMatch{
{
Index: "4",
ID: "d",
Score: 4.0,
Sort: []string{string(score4)},
},
},
MaxScore: 4.0,
},
}
alias1 := NewIndexAlias(ei1, ei2)
alias2 := NewIndexAlias(ei3, ei4)
aliasTop := NewIndexAlias(alias1, alias2)
// ei2 and ei3 have 50ms delay
// search across aliasTop should still get results from ei1 and ei4
// total should still be 4
var cancel context.CancelFunc
ctx, cancel = context.WithTimeout(context.Background(), 25*time.Millisecond)
defer cancel()
query := NewTermQuery("test")
sr := NewSearchRequest(query)
expected := &SearchResult{
Status: &SearchStatus{
Total: 4,
Successful: 2,
Failed: 2,
Errors: map[string]error{
"ei2": context.DeadlineExceeded,
"ei3": context.DeadlineExceeded,
},
},
Total: 2,
Hits: search.DocumentMatchCollection{
{
Index: "4",
ID: "d",
Score: 4.0,
Sort: []string{string(score4)},
},
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{string(score1)},
},
},
MaxScore: 4.0,
}
res, err := aliasTop.SearchInContext(ctx, sr)
if err != nil {
t.Fatalf("expected no err, got %v", err)
}
expected.Took = res.Took
if !reflect.DeepEqual(res, expected) {
t.Errorf("expected %#v, got %#v", expected, res)
}
}
// TestMultiSearchCustomSort
func TestMultiSearchCustomSort(t *testing.T) {
ei1 := &stubIndex{err: nil, searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 2,
Hits: search.DocumentMatchCollection{
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{"albert"},
},
{
Index: "1",
ID: "b",
Score: 2.0,
Sort: []string{"crown"},
},
},
MaxScore: 2.0,
}}
ei2 := &stubIndex{err: nil, searchResult: &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 2,
Hits: search.DocumentMatchCollection{
{
Index: "2",
ID: "c",
Score: 2.5,
Sort: []string{"frank"},
},
{
Index: "2",
ID: "d",
Score: 3.0,
Sort: []string{"zombie"},
},
},
MaxScore: 3.0,
}}
sr := NewSearchRequest(NewTermQuery("test"))
sr.Explain = true
sr.SortBy([]string{"name"})
expected := &SearchResult{
Status: &SearchStatus{
Total: 2,
Successful: 2,
Errors: make(map[string]error),
},
Request: sr,
Total: 4,
Hits: search.DocumentMatchCollection{
{
Index: "1",
ID: "a",
Score: 1.0,
Sort: []string{"albert"},
},
{
Index: "1",
ID: "b",
Score: 2.0,
Sort: []string{"crown"},
},
{
Index: "2",
ID: "c",
Score: 2.5,
Sort: []string{"frank"},
},
{
Index: "2",
ID: "d",
Score: 3.0,
Sort: []string{"zombie"},
},
},
MaxScore: 3.0,
}
results, err := MultiSearch(context.Background(), sr, nil, ei1, ei2)
if err != nil {
t.Error(err)
}
// cheat and ensure that Took field matches since it involves time
expected.Took = results.Took
if !reflect.DeepEqual(results, expected) {
t.Errorf("expected %v, got %v", expected, results)
}
}
// stubIndex is an Index impl for which all operations
// return the configured error value, unless the
// corresponding operation result value has been
// set, in which case that is returned instead
type stubIndex struct {
name string
err error
searchResult *SearchResult
documentResult *document.Document
docCountResult *uint64
checkRequest func(*SearchRequest) error
}
func (i *stubIndex) Index(id string, data interface{}) error {
return i.err
}
func (i *stubIndex) Delete(id string) error {
return i.err
}
func (i *stubIndex) Batch(b *Batch) error {
return i.err
}
func (i *stubIndex) Document(id string) (index.Document, error) {
if i.documentResult != nil {
return i.documentResult, nil
}
return nil, i.err
}
func (i *stubIndex) DocCount() (uint64, error) {
if i.docCountResult != nil {
return *i.docCountResult, nil
}
return 0, i.err
}
func (i *stubIndex) Search(req *SearchRequest) (*SearchResult, error) {
return i.SearchInContext(context.Background(), req)
}
func (i *stubIndex) SearchInContext(ctx context.Context, req *SearchRequest) (*SearchResult, error) {
if i.checkRequest != nil {
err := i.checkRequest(req)
if err != nil {
return nil, err
}
}
if i.searchResult != nil {
return i.searchResult, nil
}
return nil, i.err
}
func (i *stubIndex) Fields() ([]string, error) {
return nil, i.err
}
func (i *stubIndex) FieldDict(field string) (index.FieldDict, error) {
return nil, i.err
}
func (i *stubIndex) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
return nil, i.err
}
func (i *stubIndex) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
return nil, i.err
}
func (i *stubIndex) Close() error {
return i.err
}
func (i *stubIndex) Mapping() mapping.IndexMapping {
return nil
}
func (i *stubIndex) Stats() *IndexStat {
return nil
}
func (i *stubIndex) StatsMap() map[string]interface{} {
return nil
}
func (i *stubIndex) GetInternal(key []byte) ([]byte, error) {
return nil, i.err
}
func (i *stubIndex) SetInternal(key, val []byte) error {
return i.err
}
func (i *stubIndex) DeleteInternal(key []byte) error {
return i.err
}
func (i *stubIndex) Advanced() (index.Index, error) {
return nil, nil
}
func (i *stubIndex) NewBatch() *Batch {
return &Batch{}
}
func (i *stubIndex) Name() string {
return i.name
}
func (i *stubIndex) SetName(name string) {
i.name = name
}
================================================
FILE: index_impl.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"context"
"fmt"
"io"
"os"
"path/filepath"
"regexp"
"strconv"
"sync"
"sync/atomic"
"time"
"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds"
"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds"
"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds"
"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/index/upsidedown"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/collector"
"github.com/blevesearch/bleve/v2/search/facet"
"github.com/blevesearch/bleve/v2/search/highlight"
"github.com/blevesearch/bleve/v2/search/query"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
"github.com/blevesearch/geo/s2"
)
type indexImpl struct {
path string
name string
meta *indexMeta
i index.Index
m mapping.IndexMapping
mutex sync.RWMutex
open bool
stats *IndexStat
}
const storePath = "store"
const (
SearchQueryStartCallbackKey search.ContextKey = "_search_query_start_callback_key"
SearchQueryEndCallbackKey search.ContextKey = "_search_query_end_callback_key"
)
type (
SearchQueryStartCallbackFn func(size uint64) error
SearchQueryEndCallbackFn func(size uint64) error
)
func indexStorePath(path string) string {
return path + string(os.PathSeparator) + storePath
}
func newIndexUsing(path string, mapping mapping.IndexMapping, indexType string, kvstore string, kvconfig map[string]interface{}) (*indexImpl, error) {
// first validate the mapping
err := mapping.Validate()
if err != nil {
return nil, err
}
if kvconfig == nil {
kvconfig = map[string]interface{}{}
}
if kvstore == "" {
return nil, fmt.Errorf("bleve not configured for file based indexing")
}
rv := indexImpl{
path: path,
name: path,
m: mapping,
meta: newIndexMeta(indexType, kvstore, kvconfig),
}
rv.stats = &IndexStat{i: &rv}
// at this point there is hope that we can be successful, so save index meta
if path != "" {
err = rv.meta.Save(path)
if err != nil {
return nil, err
}
kvconfig["create_if_missing"] = true
kvconfig["error_if_exists"] = true
kvconfig["path"] = indexStorePath(path)
} else {
kvconfig["path"] = ""
}
// open the index
indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType)
if indexTypeConstructor == nil {
return nil, ErrorUnknownIndexType
}
rv.i, err = indexTypeConstructor(rv.meta.Storage, kvconfig, Config.analysisQueue)
if err != nil {
return nil, err
}
err = rv.i.Open()
if err != nil {
return nil, err
}
defer func(rv *indexImpl) {
if !rv.open {
rv.i.Close()
}
}(&rv)
// now persist the mapping
mappingBytes, err := util.MarshalJSON(mapping)
if err != nil {
return nil, err
}
err = rv.i.SetInternal(util.MappingInternalKey, mappingBytes)
if err != nil {
return nil, err
}
// mark the index as open
rv.mutex.Lock()
defer rv.mutex.Unlock()
rv.open = true
indexStats.Register(&rv)
return &rv, nil
}
func openIndexUsing(path string, runtimeConfig map[string]interface{}) (rv *indexImpl, err error) {
rv = &indexImpl{
path: path,
name: path,
}
rv.stats = &IndexStat{i: rv}
rv.meta, err = openIndexMeta(path)
if err != nil {
return nil, err
}
// backwards compatibility if index type is missing
if rv.meta.IndexType == "" {
rv.meta.IndexType = upsidedown.Name
}
var um *mapping.IndexMappingImpl
var umBytes []byte
storeConfig := rv.meta.Config
if storeConfig == nil {
storeConfig = map[string]interface{}{}
}
storeConfig["path"] = indexStorePath(path)
storeConfig["create_if_missing"] = false
storeConfig["error_if_exists"] = false
for rck, rcv := range runtimeConfig {
storeConfig[rck] = rcv
if rck == "updated_mapping" {
if val, ok := rcv.(string); ok {
if len(val) == 0 {
return nil, fmt.Errorf("updated_mapping is empty")
}
umBytes = []byte(val)
err = util.UnmarshalJSON(umBytes, &um)
if err != nil {
return nil, fmt.Errorf("error parsing updated_mapping into JSON: %v\nmapping contents:\n%v", err, rck)
}
} else {
return nil, fmt.Errorf("updated_mapping not of type string")
}
}
}
// open the index
indexTypeConstructor := registry.IndexTypeConstructorByName(rv.meta.IndexType)
if indexTypeConstructor == nil {
return nil, ErrorUnknownIndexType
}
rv.i, err = indexTypeConstructor(rv.meta.Storage, storeConfig, Config.analysisQueue)
if err != nil {
return nil, err
}
var ui index.UpdateIndex
if um != nil {
var ok bool
ui, ok = rv.i.(index.UpdateIndex)
if !ok {
return nil, fmt.Errorf("updated mapping present for unupdatable index")
}
// Load the meta data from bolt so that we can read the current index
// mapping to compare with
err = ui.OpenMeta()
if err != nil {
return nil, err
}
} else {
err = rv.i.Open()
if err != nil {
return nil, err
}
defer func(rv *indexImpl) {
if !rv.open {
rv.i.Close()
}
}(rv)
}
// now load the mapping
indexReader, err := rv.i.Reader()
if err != nil {
return nil, err
}
defer func() {
if cerr := indexReader.Close(); cerr != nil && err == nil {
err = cerr
}
}()
mappingBytes, err := indexReader.GetInternal(util.MappingInternalKey)
if err != nil {
return nil, err
}
var im *mapping.IndexMappingImpl
err = util.UnmarshalJSON(mappingBytes, &im)
if err != nil {
return nil, fmt.Errorf("error parsing mapping JSON: %v\nmapping contents:\n%s", err, string(mappingBytes))
}
// validate the mapping
err = im.Validate()
if err != nil {
// no longer return usable index on error because there
// is a chance the index is not open at this stage
return nil, err
}
// Validate and update the index with the new mapping
if um != nil && ui != nil {
err = um.Validate()
if err != nil {
return nil, err
}
fieldInfo, err := DeletedFields(im, um)
if err != nil {
return nil, err
}
err = ui.UpdateFields(fieldInfo, umBytes)
if err != nil {
return nil, err
}
im = um
err = rv.i.Open()
if err != nil {
return nil, err
}
defer func(rv *indexImpl) {
if !rv.open {
rv.i.Close()
}
}(rv)
}
// mark the index as open
rv.mutex.Lock()
defer rv.mutex.Unlock()
rv.open = true
rv.m = im
indexStats.Register(rv)
return rv, err
}
// Advanced returns internal index implementation
func (i *indexImpl) Advanced() (index.Index, error) {
return i.i, nil
}
// Mapping returns the IndexMapping in use by this
// Index.
func (i *indexImpl) Mapping() mapping.IndexMapping {
return i.m
}
// Index the object with the specified identifier.
// The IndexMapping for this index will determine
// how the object is indexed.
func (i *indexImpl) Index(id string, data interface{}) (err error) {
if id == "" {
return ErrorEmptyID
}
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
i.FireIndexEvent()
doc := document.NewDocument(id)
err = i.m.MapDocument(doc, data)
if err != nil {
return
}
err = i.i.Update(doc)
return
}
// IndexSynonym indexes a synonym definition, with the specified id and belonging to the specified collection.
// Synonym definition defines term relationships for query expansion in searches.
func (i *indexImpl) IndexSynonym(id string, collection string, definition *SynonymDefinition) error {
if id == "" {
return ErrorEmptyID
}
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
i.FireIndexEvent()
synMap, ok := i.m.(mapping.SynonymMapping)
if !ok {
return ErrorSynonymSearchNotSupported
}
if err := definition.Validate(); err != nil {
return err
}
doc := document.NewSynonymDocument(id)
err := synMap.MapSynonymDocument(doc, collection, definition.Input, definition.Synonyms)
if err != nil {
return err
}
err = i.i.Update(doc)
return err
}
// IndexAdvanced takes a document.Document object
// skips the mapping and indexes it.
func (i *indexImpl) IndexAdvanced(doc *document.Document) (err error) {
if doc.ID() == "" {
return ErrorEmptyID
}
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
err = i.i.Update(doc)
return
}
// Delete entries for the specified identifier from
// the index.
func (i *indexImpl) Delete(id string) (err error) {
if id == "" {
return ErrorEmptyID
}
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
err = i.i.Delete(id)
return
}
// Batch executes multiple Index and Delete
// operations at the same time. There are often
// significant performance benefits when performing
// operations in a batch.
func (i *indexImpl) Batch(b *Batch) error {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
return i.i.Batch(b.internal)
}
// Document is used to find the values of all the
// stored fields for a document in the index. These
// stored fields are put back into a Document object
// and returned.
func (i *indexImpl) Document(id string) (doc index.Document, err error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
indexReader, err := i.i.Reader()
if err != nil {
return nil, err
}
defer func() {
if cerr := indexReader.Close(); err == nil && cerr != nil {
err = cerr
}
}()
doc, err = indexReader.Document(id)
if err != nil {
return nil, err
}
return doc, nil
}
// DocCount returns the number of documents in the
// index.
func (i *indexImpl) DocCount() (count uint64, err error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return 0, ErrorIndexClosed
}
// open a reader for this search
indexReader, err := i.i.Reader()
if err != nil {
return 0, fmt.Errorf("error opening index reader %v", err)
}
defer func() {
if cerr := indexReader.Close(); err == nil && cerr != nil {
err = cerr
}
}()
count, err = indexReader.DocCount()
return
}
// Search executes a search request operation.
// Returns a SearchResult object or an error.
func (i *indexImpl) Search(req *SearchRequest) (sr *SearchResult, err error) {
return i.SearchInContext(context.Background(), req)
}
var (
documentMatchEmptySize int
searchContextEmptySize int
facetResultEmptySize int
documentEmptySize int
)
func init() {
var dm search.DocumentMatch
documentMatchEmptySize = dm.Size()
var sc search.SearchContext
searchContextEmptySize = sc.Size()
var fr search.FacetResult
facetResultEmptySize = fr.Size()
var d document.Document
documentEmptySize = d.Size()
}
// memNeededForSearch is a helper function that returns an estimate of RAM
// needed to execute a search request.
func memNeededForSearch(req *SearchRequest,
searcher search.Searcher,
topnCollector *collector.TopNCollector,
) uint64 {
backingSize := req.Size + req.From + 1
if req.Size+req.From > collector.PreAllocSizeSkipCap {
backingSize = collector.PreAllocSizeSkipCap + 1
}
numDocMatches := backingSize + searcher.DocumentMatchPoolSize()
estimate := 0
// overhead, size in bytes from collector
estimate += topnCollector.Size()
// pre-allocing DocumentMatchPool
estimate += searchContextEmptySize + numDocMatches*documentMatchEmptySize
// searcher overhead
estimate += searcher.Size()
// overhead from results, lowestMatchOutsideResults
estimate += (numDocMatches + 1) * documentMatchEmptySize
// additional overhead from SearchResult
estimate += reflectStaticSizeSearchResult + reflectStaticSizeSearchStatus
// overhead from facet results
if req.Facets != nil {
estimate += len(req.Facets) * facetResultEmptySize
}
// highlighting, store
if len(req.Fields) > 0 || req.Highlight != nil {
// Size + From => number of hits
estimate += (req.Size + req.From) * documentEmptySize
}
return uint64(estimate)
}
func (i *indexImpl) preSearch(ctx context.Context, req *SearchRequest, reader index.IndexReader) (*SearchResult, error) {
var knnHits []*search.DocumentMatch
var err error
if requestHasKNN(req) {
knnHits, err = i.runKnnCollector(ctx, req, reader, true)
if err != nil {
return nil, err
}
}
var fts search.FieldTermSynonymMap
var count uint64
var fieldCardinality map[string]int
if !isMatchNoneQuery(req.Query) {
if synMap, ok := i.m.(mapping.SynonymMapping); ok {
if synReader, ok := reader.(index.ThesaurusReader); ok {
fts, err = query.ExtractSynonyms(ctx, synMap, synReader, req.Query, fts)
if err != nil {
return nil, err
}
}
}
if ok := isBM25Enabled(i.m); ok {
fieldCardinality = make(map[string]int)
count, err = reader.DocCount()
if err != nil {
return nil, err
}
fs, err := query.ExtractFields(req.Query, i.m, search.NewFieldSet())
if err != nil {
return nil, err
}
for field := range fs {
if bm25Reader, ok := reader.(index.BM25Reader); ok {
fieldCardinality[field], err = bm25Reader.FieldCardinality(field)
if err != nil {
return nil, err
}
}
}
}
}
return &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
},
Hits: knnHits,
SynonymResult: fts,
BM25Stats: &search.BM25Stats{
DocCount: float64(count),
FieldCardinality: fieldCardinality,
},
}, nil
}
// SearchInContext executes a search request operation within the provided
// Context. Returns a SearchResult object or an error.
func (i *indexImpl) SearchInContext(ctx context.Context, req *SearchRequest) (sr *SearchResult, err error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
searchStart := time.Now()
if !i.open {
return nil, ErrorIndexClosed
}
// open a reader for this search
indexReader, err := i.i.Reader()
if err != nil {
return nil, fmt.Errorf("error opening index reader %v", err)
}
defer func() {
if cerr := indexReader.Close(); err == nil && cerr != nil {
err = cerr
}
}()
// rescorer will be set if score fusion is supposed to happen
// at this alias (root alias), else will be nil
var rescorer *rescorer
if _, ok := ctx.Value(search.ScoreFusionKey).(bool); !ok {
// new context will be used in internal functions to collect data
// as suitable for hybrid search. Rescorer is used for rescoring
// using fusion algorithms.
if IsScoreFusionRequested(req) {
ctx = context.WithValue(ctx, search.ScoreFusionKey, true)
rescorer = newRescorer(req)
rescorer.prepareSearchRequest()
defer rescorer.restoreSearchRequest()
}
}
// ------------------------------------------------------------------------------------------
// set up additional contexts for any search operation that will proceed from
// here, such as presearch, knn collector, topn collector etc.
// Scoring model callback to be used to get scoring model
scoringModelCallback := func() string {
if isBM25Enabled(i.m) {
return index.BM25Scoring
}
return index.DefaultScoringModel
}
ctx = context.WithValue(ctx, search.GetScoringModelCallbackKey,
search.GetScoringModelCallbackFn(scoringModelCallback))
// This callback and variable handles the tracking of bytes read
// 1. as part of creation of tfr and its Next() calls which is
// accounted by invoking this callback when the TFR is closed.
// 2. the docvalues portion (accounted in collector) and the retrieval
// of stored fields bytes (by LoadAndHighlightFields)
var totalSearchCost uint64
sendBytesRead := func(bytesRead uint64) {
totalSearchCost += bytesRead
}
// Ensure IO cost accounting and result cost assignment happen on all return paths
defer func() {
if sr != nil {
sr.Cost = totalSearchCost
}
if is, ok := indexReader.(*scorch.IndexSnapshot); ok {
is.UpdateIOStats(totalSearchCost)
}
search.RecordSearchCost(ctx, search.DoneM, 0)
}()
ctx = context.WithValue(ctx, search.SearchIOStatsCallbackKey, search.SearchIOStatsCallbackFunc(sendBytesRead))
// Geo buffer pool callback to be used for getting geo buffer pool
var bufPool *s2.GeoBufferPool
getBufferPool := func() *s2.GeoBufferPool {
if bufPool == nil {
bufPool = s2.NewGeoBufferPool(search.MaxGeoBufPoolSize, search.MinGeoBufPoolSize)
}
return bufPool
}
ctx = context.WithValue(ctx, search.GeoBufferPoolCallbackKey, search.GeoBufferPoolCallbackFunc(getBufferPool))
// check if the index mapping has any nested fields, which should force
// all collectors and searchers to be run in nested mode
if nm, ok := i.m.(mapping.NestedMapping); ok {
if nm.CountNested() > 0 {
ctx = context.WithValue(ctx, search.NestedSearchKey, true)
}
}
// ------------------------------------------------------------------------------------------
if _, ok := ctx.Value(search.PreSearchKey).(bool); ok {
sr, err = i.preSearch(ctx, req, indexReader)
if err != nil {
return nil, err
}
// increment the search count here itself,
// since the presearch may already satisfy
// the search request
atomic.AddUint64(&i.stats.searches, 1)
// increment the search time stat here as well,
// since presearch is part of the overall search
// operation and should be included in the search
// time stat
searchDuration := time.Since(searchStart)
atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration))
return sr, nil
}
var reverseQueryExecution bool
if req.SearchBefore != nil {
reverseQueryExecution = true
req.Sort.Reverse()
req.SearchAfter = req.SearchBefore
req.SearchBefore = nil
}
coll, err := i.buildTopNCollector(ctx, req, indexReader)
if err != nil {
return nil, err
}
var knnHits []*search.DocumentMatch
var skipKNNCollector bool
var fts search.FieldTermSynonymMap
var skipSynonymCollector bool
var bm25Stats *search.BM25Stats
var ok bool
if req.PreSearchData != nil {
for k, v := range req.PreSearchData {
switch k {
case search.KnnPreSearchDataKey:
if v != nil {
knnHits, ok = v.([]*search.DocumentMatch)
if !ok {
return nil, fmt.Errorf("knn preSearchData must be of type []*search.DocumentMatch")
}
skipKNNCollector = true
}
case search.SynonymPreSearchDataKey:
if v != nil {
fts, ok = v.(search.FieldTermSynonymMap)
if !ok {
return nil, fmt.Errorf("synonym preSearchData must be of type search.FieldTermSynonymMap")
}
skipSynonymCollector = true
}
case search.BM25PreSearchDataKey:
if v != nil {
bm25Stats, ok = v.(*search.BM25Stats)
if !ok {
return nil, fmt.Errorf("bm25 preSearchData must be of type *search.BM25Stats")
}
}
}
}
}
_, contextScoreFusionKeyExists := ctx.Value(search.ScoreFusionKey).(bool)
if !contextScoreFusionKeyExists {
// if no score fusion, default behaviour
if !skipKNNCollector && requestHasKNN(req) {
knnHits, err = i.runKnnCollector(ctx, req, indexReader, false)
if err != nil {
return nil, err
}
}
} else {
// if score fusion, run collect if rescorer is defined
if rescorer != nil && requestHasKNN(req) {
knnHits, err = i.runKnnCollector(ctx, req, indexReader, false)
if err != nil {
return nil, err
}
}
}
if !skipSynonymCollector {
if synMap, ok := i.m.(mapping.SynonymMapping); ok && synMap.SynonymCount() > 0 {
if synReader, ok := indexReader.(index.ThesaurusReader); ok {
fts, err = query.ExtractSynonyms(ctx, synMap, synReader, req.Query, fts)
if err != nil {
return nil, err
}
}
}
}
// if score fusion, no faceting for knn hits is done
// hence we can skip setting the knn hits in the collector
if !contextScoreFusionKeyExists {
setKnnHitsInCollector(knnHits, coll)
}
if fts != nil {
if is, ok := indexReader.(*scorch.IndexSnapshot); ok {
is.UpdateSynonymSearchCount(1)
}
ctx = context.WithValue(ctx, search.FieldTermSynonymMapKey, fts)
}
// set the bm25Stats (stats important for consistent scoring) in
// the context object
if bm25Stats != nil {
ctx = context.WithValue(ctx, search.BM25StatsKey, bm25Stats)
}
searcher, err := req.Query.Searcher(ctx, indexReader, i.m, search.SearcherOptions{
Explain: req.Explain,
IncludeTermVectors: req.IncludeLocations || req.Highlight != nil,
Score: req.Score,
})
if err != nil {
return nil, err
}
defer func() {
if serr := searcher.Close(); err == nil && serr != nil {
err = serr
}
}()
if req.Facets != nil {
facetsBuilder := search.NewFacetsBuilder(indexReader)
for facetName, facetRequest := range req.Facets {
if facetRequest.NumericRanges != nil {
// build numeric range facet
facetBuilder := facet.NewNumericFacetBuilder(facetRequest.Field, facetRequest.Size)
for _, nr := range facetRequest.NumericRanges {
facetBuilder.AddRange(nr.Name, nr.Min, nr.Max)
}
facetsBuilder.Add(facetName, facetBuilder)
} else if facetRequest.DateTimeRanges != nil {
// build date range facet
facetBuilder := facet.NewDateTimeFacetBuilder(facetRequest.Field, facetRequest.Size)
for _, dr := range facetRequest.DateTimeRanges {
dateTimeParserName := defaultDateTimeParser
if dr.DateTimeParser != "" {
dateTimeParserName = dr.DateTimeParser
}
dateTimeParser := i.m.DateTimeParserNamed(dateTimeParserName)
if dateTimeParser == nil {
return nil, fmt.Errorf("no date time parser named `%s` registered", dateTimeParserName)
}
start, end, err := dr.ParseDates(dateTimeParser)
if err != nil {
return nil, fmt.Errorf("ParseDates err: %v, using date time parser named %s", err, dateTimeParserName)
}
if start.IsZero() && end.IsZero() {
return nil, fmt.Errorf("date range query must specify either start, end or both for date range name '%s'", dr.Name)
}
facetBuilder.AddRange(dr.Name, start, end)
}
facetsBuilder.Add(facetName, facetBuilder)
} else {
// build terms facet
facetBuilder := facet.NewTermsFacetBuilder(facetRequest.Field, facetRequest.Size)
// Set prefix filter if provided
if facetRequest.TermPrefix != "" {
facetBuilder.SetPrefixFilter(facetRequest.TermPrefix)
}
// Set regex filter if provided
if facetRequest.TermPattern != "" {
// Use cached compiled pattern if available, otherwise compile it now
if facetRequest.compiledPattern != nil {
facetBuilder.SetRegexFilter(facetRequest.compiledPattern)
} else {
regex, err := regexp.Compile(facetRequest.TermPattern)
if err != nil {
return nil, fmt.Errorf("error compiling regex pattern for facet '%s': %v", facetName, err)
}
facetBuilder.SetRegexFilter(regex)
}
}
facetsBuilder.Add(facetName, facetBuilder)
}
}
coll.SetFacetsBuilder(facetsBuilder)
}
memNeeded := memNeededForSearch(req, searcher, coll)
if cb := ctx.Value(SearchQueryStartCallbackKey); cb != nil {
if cbF, ok := cb.(SearchQueryStartCallbackFn); ok {
err = cbF(memNeeded)
}
}
if err != nil {
return nil, err
}
if cb := ctx.Value(SearchQueryEndCallbackKey); cb != nil {
if cbF, ok := cb.(SearchQueryEndCallbackFn); ok {
defer func() {
_ = cbF(memNeeded)
}()
}
}
err = coll.Collect(ctx, searcher, indexReader)
if err != nil {
return nil, err
}
hits := coll.Results()
var highlighter highlight.Highlighter
if req.Highlight != nil {
// get the right highlighter
highlighter, err = Config.Cache.HighlighterNamed(Config.DefaultHighlighter)
if err != nil {
return nil, err
}
if req.Highlight.Style != nil {
highlighter, err = Config.Cache.HighlighterNamed(*req.Highlight.Style)
if err != nil {
return nil, err
}
}
if highlighter == nil {
return nil, fmt.Errorf("no highlighter named `%s` registered", *req.Highlight.Style)
}
}
var storedFieldsCost uint64
for _, hit := range hits {
// KNN documents will already have their Index value set as part of the knn collector output
// so check if the index is empty and set it to the current index name
if i.name != "" && hit.Index == "" {
hit.Index = i.name
}
err, storedFieldsBytes := LoadAndHighlightAllFields(hit, req, i.name, indexReader, highlighter)
if err != nil {
return nil, err
}
storedFieldsCost += storedFieldsBytes
}
totalSearchCost += storedFieldsCost
search.RecordSearchCost(ctx, search.AddM, storedFieldsCost)
if req.PreSearchData == nil {
// increment the search count only if this is not a second-phase search
// (e.g., for Hybrid Search), since the first-phase search already increments it
atomic.AddUint64(&i.stats.searches, 1)
}
// increment the search time stat, as the first-phase search is part of
// the overall operation; adding second-phase time later keeps it accurate
searchDuration := time.Since(searchStart)
atomic.AddUint64(&i.stats.searchTime, uint64(searchDuration))
if Config.SlowSearchLogThreshold > 0 &&
searchDuration > Config.SlowSearchLogThreshold {
logger.Printf("slow search took %s - %v", searchDuration, req)
}
if reverseQueryExecution {
// reverse the sort back to the original
req.Sort.Reverse()
// resort using the original order
mhs := newSearchHitSorter(req.Sort, hits)
req.SortFunc()(mhs)
// reset request
req.SearchBefore = req.SearchAfter
req.SearchAfter = nil
}
rv := &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
},
Hits: hits,
Total: coll.Total(),
MaxScore: coll.MaxScore(),
Took: searchDuration,
Facets: coll.FacetResults(),
}
// rescore if fusion flag is set
if rescorer != nil {
rv.Hits, rv.Total, rv.MaxScore = rescorer.rescore(rv.Hits, knnHits)
rescorer.restoreSearchRequest()
rv.Hits = hitsInCurrentPage(req, rv.Hits)
}
if req.Explain {
rv.Request = req
}
return rv, nil
}
func LoadAndHighlightFields(hit *search.DocumentMatch, req *SearchRequest,
indexName string, r index.IndexReader,
highlighter highlight.Highlighter,
) (error, uint64) {
var totalStoredFieldsBytes uint64
if len(req.Fields) > 0 || highlighter != nil {
doc, err := r.Document(hit.ID)
if err == nil && doc != nil {
if len(req.Fields) > 0 && hit.Fields == nil {
totalStoredFieldsBytes = doc.StoredFieldsBytes()
fieldsToLoad := deDuplicate(req.Fields)
for _, f := range fieldsToLoad {
doc.VisitFields(func(docF index.Field) {
if f == "*" || docF.Name() == f {
var value interface{}
switch docF := docF.(type) {
case index.TextField:
value = docF.Text()
case index.NumericField:
num, err := docF.Number()
if err == nil {
value = num
}
case index.DateTimeField:
datetime, layout, err := docF.DateTime()
if err == nil {
if layout == "" {
// missing layout means we fallback to
// the default layout which is RFC3339
value = datetime.Format(time.RFC3339)
} else {
// the layout here can now either be representative
// of an actual datetime layout or a timestamp
switch layout {
case seconds.Name:
value = strconv.FormatInt(datetime.Unix(), 10)
case milliseconds.Name:
value = strconv.FormatInt(datetime.UnixMilli(), 10)
case microseconds.Name:
value = strconv.FormatInt(datetime.UnixMicro(), 10)
case nanoseconds.Name:
value = strconv.FormatInt(datetime.UnixNano(), 10)
default:
// the layout for formatting the date to a string
// is provided by a datetime parser which is not
// handling the timestamp case, hence the layout
// can be directly used to format the date
value = datetime.Format(layout)
}
}
}
case index.BooleanField:
boolean, err := docF.Boolean()
if err == nil {
value = boolean
}
case index.GeoPointField:
lon, err := docF.Lon()
if err == nil {
lat, err := docF.Lat()
if err == nil {
value = []float64{lon, lat}
}
}
case index.GeoShapeField:
v, err := docF.GeoShape()
if err == nil {
value = v
}
case index.IPField:
ip, err := docF.IP()
if err == nil {
value = ip.String()
}
}
if value != nil {
hit.AddFieldValue(docF.Name(), value)
}
}
})
}
}
if highlighter != nil {
highlightFields := req.Highlight.Fields
if highlightFields == nil {
// add all fields with matches
highlightFields = make([]string, 0, len(hit.Locations))
for k := range hit.Locations {
highlightFields = append(highlightFields, k)
}
}
for _, hf := range highlightFields {
highlighter.BestFragmentsInField(hit, doc, hf, 1)
}
}
} else if doc == nil {
// unexpected case, a doc ID that was found as a search hit
// was unable to be found during document lookup
return ErrorIndexReadInconsistency, 0
}
}
return nil, totalStoredFieldsBytes
}
const NestedDocumentKey = "_$nested"
// LoadAndHighlightAllFields loads stored fields + highlights for root and its descendants.
// All descendant documents are collected into a _$nested array in the root DocumentMatch.
func LoadAndHighlightAllFields(
root *search.DocumentMatch,
req *SearchRequest,
indexName string,
r index.IndexReader,
highlighter highlight.Highlighter,
) (error, uint64) {
var totalStoredFieldsBytes uint64
// load root fields/highlights
err, bytes := LoadAndHighlightFields(root, req, indexName, r, highlighter)
totalStoredFieldsBytes += bytes
if err != nil {
return err, totalStoredFieldsBytes
}
// collect all descendant documents
nestedDocs := make([]*search.NestedDocumentMatch, 0, len(root.Descendants))
// create a dummy desc DocumentMatch to reuse LoadAndHighlightFields
desc := &search.DocumentMatch{}
for _, descID := range root.Descendants {
extID, err := r.ExternalID(descID)
if err != nil {
return err, totalStoredFieldsBytes
}
// reset desc for reuse
desc.ID = extID
desc.IndexInternalID = descID
desc.Locations = root.Locations
err, bytes := LoadAndHighlightFields(desc, req, indexName, r, highlighter)
totalStoredFieldsBytes += bytes
if err != nil {
return err, totalStoredFieldsBytes
}
// copy fields to nested doc and append
if len(desc.Fields) != 0 || len(desc.Fragments) != 0 {
nestedDocs = append(nestedDocs, search.NewNestedDocumentMatch(desc.Fields, desc.Fragments))
}
desc.Fields = nil
desc.Fragments = nil
}
// add nested documents to root under _$nested key
if len(nestedDocs) > 0 {
root.AddFieldValue(NestedDocumentKey, nestedDocs)
}
return nil, totalStoredFieldsBytes
}
// Fields returns the name of all the fields this
// Index has operated on.
func (i *indexImpl) Fields() (fields []string, err error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
indexReader, err := i.i.Reader()
if err != nil {
return nil, err
}
defer func() {
if cerr := indexReader.Close(); err == nil && cerr != nil {
err = cerr
}
}()
fields, err = indexReader.Fields()
if err != nil {
return nil, err
}
return fields, nil
}
func (i *indexImpl) FieldDict(field string) (index.FieldDict, error) {
i.mutex.RLock()
if !i.open {
i.mutex.RUnlock()
return nil, ErrorIndexClosed
}
indexReader, err := i.i.Reader()
if err != nil {
i.mutex.RUnlock()
return nil, err
}
fieldDict, err := indexReader.FieldDict(field)
if err != nil {
i.mutex.RUnlock()
return nil, err
}
return &indexImplFieldDict{
index: i,
indexReader: indexReader,
fieldDict: fieldDict,
}, nil
}
func (i *indexImpl) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
i.mutex.RLock()
if !i.open {
i.mutex.RUnlock()
return nil, ErrorIndexClosed
}
indexReader, err := i.i.Reader()
if err != nil {
i.mutex.RUnlock()
return nil, err
}
fieldDict, err := indexReader.FieldDictRange(field, startTerm, endTerm)
if err != nil {
i.mutex.RUnlock()
return nil, err
}
return &indexImplFieldDict{
index: i,
indexReader: indexReader,
fieldDict: fieldDict,
}, nil
}
func (i *indexImpl) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
i.mutex.RLock()
if !i.open {
i.mutex.RUnlock()
return nil, ErrorIndexClosed
}
indexReader, err := i.i.Reader()
if err != nil {
i.mutex.RUnlock()
return nil, err
}
fieldDict, err := indexReader.FieldDictPrefix(field, termPrefix)
if err != nil {
i.mutex.RUnlock()
return nil, err
}
return &indexImplFieldDict{
index: i,
indexReader: indexReader,
fieldDict: fieldDict,
}, nil
}
func (i *indexImpl) Close() error {
i.mutex.Lock()
defer i.mutex.Unlock()
indexStats.UnRegister(i)
i.open = false
return i.i.Close()
}
func (i *indexImpl) Stats() *IndexStat {
return i.stats
}
func (i *indexImpl) StatsMap() map[string]interface{} {
return i.stats.statsMap()
}
func (i *indexImpl) GetInternal(key []byte) (val []byte, err error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
reader, err := i.i.Reader()
if err != nil {
return nil, err
}
defer func() {
if cerr := reader.Close(); err == nil && cerr != nil {
err = cerr
}
}()
val, err = reader.GetInternal(key)
if err != nil {
return nil, err
}
return val, nil
}
func (i *indexImpl) SetInternal(key, val []byte) error {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
return i.i.SetInternal(key, val)
}
func (i *indexImpl) DeleteInternal(key []byte) error {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
return i.i.DeleteInternal(key)
}
// NewBatch creates a new empty batch.
func (i *indexImpl) NewBatch() *Batch {
return &Batch{
index: i,
internal: index.NewBatch(),
}
}
func (i *indexImpl) Name() string {
return i.name
}
func (i *indexImpl) SetName(name string) {
indexStats.UnRegister(i)
i.name = name
indexStats.Register(i)
}
type indexImplFieldDict struct {
index *indexImpl
indexReader index.IndexReader
fieldDict index.FieldDict
}
func (f *indexImplFieldDict) BytesRead() uint64 {
return f.fieldDict.BytesRead()
}
func (f *indexImplFieldDict) Next() (*index.DictEntry, error) {
return f.fieldDict.Next()
}
func (f *indexImplFieldDict) Close() error {
defer f.index.mutex.RUnlock()
err := f.fieldDict.Close()
if err != nil {
return err
}
return f.indexReader.Close()
}
func (f *indexImplFieldDict) Cardinality() int {
return f.fieldDict.Cardinality()
}
// helper function to remove duplicate entries from slice of strings
func deDuplicate(fields []string) []string {
if len(fields) == 0 {
return fields
}
entries := make(map[string]struct{})
ret := []string{}
for _, entry := range fields {
if _, exists := entries[entry]; !exists {
entries[entry] = struct{}{}
ret = append(ret, entry)
}
}
return ret
}
type searchHitSorter struct {
hits search.DocumentMatchCollection
sort search.SortOrder
cachedScoring []bool
cachedDesc []bool
}
func newSearchHitSorter(sort search.SortOrder, hits search.DocumentMatchCollection) *searchHitSorter {
return &searchHitSorter{
sort: sort,
hits: hits,
cachedScoring: sort.CacheIsScore(),
cachedDesc: sort.CacheDescending(),
}
}
func (m *searchHitSorter) Len() int { return len(m.hits) }
func (m *searchHitSorter) Swap(i, j int) { m.hits[i], m.hits[j] = m.hits[j], m.hits[i] }
func (m *searchHitSorter) Less(i, j int) bool {
c := m.sort.Compare(m.cachedScoring, m.cachedDesc, m.hits[i], m.hits[j])
return c < 0
}
func (i *indexImpl) CopyTo(d index.Directory) (err error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return ErrorIndexClosed
}
copyIndex, ok := i.i.(index.CopyIndex)
if !ok {
return fmt.Errorf("index implementation does not support copy reader")
}
copyReader := copyIndex.CopyReader()
if copyReader == nil {
return fmt.Errorf("index's copyReader is nil")
}
defer func() {
if cerr := copyReader.CloseCopyReader(); err == nil && cerr != nil {
err = cerr
}
}()
err = copyReader.CopyTo(d)
if err != nil {
return fmt.Errorf("error copying index metadata: %v", err)
}
// copy the metadata
return i.meta.CopyTo(d)
}
func (f FileSystemDirectory) GetWriter(filePath string) (io.WriteCloser,
error,
) {
dir, file := filepath.Split(filePath)
if dir != "" {
err := os.MkdirAll(filepath.Join(string(f), dir), os.ModePerm)
if err != nil {
return nil, err
}
}
return os.OpenFile(filepath.Join(string(f), dir, file),
os.O_RDWR|os.O_CREATE, 0o600)
}
func (i *indexImpl) FireIndexEvent() {
// get the internal index implementation
internalIndex, err := i.Advanced()
if err != nil {
return
}
// check if the internal index implementation supports events
if internalEventIndex, ok := internalIndex.(index.EventIndex); ok {
// fire the Index() event
internalEventIndex.FireIndexEvent()
}
}
// -----------------------------------------------------------------------------
func (i *indexImpl) TermFrequencies(field string, limit int, descending bool) (
[]index.TermFreq, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
reader, err := i.i.Reader()
if err != nil {
return nil, err
}
defer func() {
if cerr := reader.Close(); err == nil && cerr != nil {
err = cerr
}
}()
insightsReader, ok := reader.(index.IndexInsightsReader)
if !ok {
return nil, fmt.Errorf("index reader does not support TermFrequencies")
}
return insightsReader.TermFrequencies(field, limit, descending)
}
func (i *indexImpl) CentroidCardinalities(field string, limit int, descending bool) (
[]index.CentroidCardinality, error) {
i.mutex.RLock()
defer i.mutex.RUnlock()
if !i.open {
return nil, ErrorIndexClosed
}
reader, err := i.i.Reader()
if err != nil {
return nil, err
}
defer func() {
if cerr := reader.Close(); err == nil && cerr != nil {
err = cerr
}
}()
insightsReader, ok := reader.(index.IndexInsightsReader)
if !ok {
return nil, fmt.Errorf("index reader does not support CentroidCardinalities")
}
centroidCardinalities, err := insightsReader.CentroidCardinalities(field, limit, descending)
if err != nil {
return nil, err
}
for j := 0; j < len(centroidCardinalities); j++ {
centroidCardinalities[j].Index = i.name
}
return centroidCardinalities, nil
}
func (i *indexImpl) buildTopNCollector(ctx context.Context, req *SearchRequest, reader index.IndexReader) (*collector.TopNCollector, error) {
newCollector := func() *collector.TopNCollector {
if req.SearchAfter != nil {
return collector.NewTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter)
}
return collector.NewTopNCollector(req.Size, req.From, req.Sort)
}
newNestedCollector := func(nr index.NestedReader) *collector.TopNCollector {
if req.SearchAfter != nil {
return collector.NewNestedTopNCollectorAfter(req.Size, req.Sort, req.SearchAfter, nr)
}
return collector.NewNestedTopNCollector(req.Size, req.From, req.Sort, nr)
}
// check if we are in nested mode
if nestedMode, ok := ctx.Value(search.NestedSearchKey).(bool); ok && nestedMode {
// get the nested reader from the index reader
if nr, ok := reader.(index.NestedReader); ok {
// check if the mapping has any nested fields that intersect
if nm, ok := i.m.(mapping.NestedMapping); ok {
var fs search.FieldSet
var err error
fs, err = query.ExtractFields(req.Query, i.m, fs)
if err != nil {
return nil, err
}
if fs.HasID() || nm.IntersectsPrefix(fs) {
return newNestedCollector(nr), nil
}
}
}
}
return newCollector(), nil
}
================================================
FILE: index_meta.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"fmt"
"os"
"path/filepath"
"github.com/blevesearch/bleve/v2/index/upsidedown"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
const metaFilename = "index_meta.json"
type indexMeta struct {
Storage string `json:"storage"`
IndexType string `json:"index_type"`
Config map[string]interface{} `json:"config,omitempty"`
}
func newIndexMeta(indexType string, storage string, config map[string]interface{}) *indexMeta {
return &indexMeta{
IndexType: indexType,
Storage: storage,
Config: config,
}
}
func openIndexMeta(path string) (*indexMeta, error) {
if _, err := os.Stat(path); os.IsNotExist(err) {
return nil, ErrorIndexPathDoesNotExist
}
indexMetaPath := indexMetaPath(path)
metaBytes, err := os.ReadFile(indexMetaPath)
if err != nil {
return nil, ErrorIndexMetaMissing
}
var im indexMeta
err = util.UnmarshalJSON(metaBytes, &im)
if err != nil {
return nil, ErrorIndexMetaCorrupt
}
if im.IndexType == "" {
im.IndexType = upsidedown.Name
}
return &im, nil
}
func (i *indexMeta) Save(path string) (err error) {
indexMetaPath := indexMetaPath(path)
// ensure any necessary parent directories exist
err = os.MkdirAll(path, 0700)
if err != nil {
if os.IsExist(err) {
return ErrorIndexPathExists
}
return err
}
metaBytes, err := util.MarshalJSON(i)
if err != nil {
return err
}
indexMetaFile, err := os.OpenFile(indexMetaPath, os.O_RDWR|os.O_CREATE|os.O_EXCL, 0666)
if err != nil {
if os.IsExist(err) {
return ErrorIndexPathExists
}
return err
}
defer func() {
if ierr := indexMetaFile.Close(); err == nil && ierr != nil {
err = ierr
}
}()
_, err = indexMetaFile.Write(metaBytes)
if err != nil {
return err
}
return nil
}
func (i *indexMeta) CopyTo(d index.Directory) (err error) {
metaBytes, err := util.MarshalJSON(i)
if err != nil {
return err
}
w, err := d.GetWriter(metaFilename)
if w == nil || err != nil {
return fmt.Errorf("invalid writer for file: %s, err: %v",
metaFilename, err)
}
defer w.Close()
_, err = w.Write(metaBytes)
return err
}
func indexMetaPath(path string) string {
return filepath.Join(path, metaFilename)
}
================================================
FILE: index_meta_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"os"
"testing"
)
func TestIndexMeta(t *testing.T) {
var testIndexPath = "doesnotexit.bleve"
defer func() {
err := os.RemoveAll(testIndexPath)
if err != nil {
t.Fatal(err)
}
}()
// open non-existent meta should give an error
_, err := openIndexMeta(testIndexPath)
if err == nil {
t.Errorf("expected error, got nil")
}
// create meta
im := &indexMeta{Storage: "boltdb"}
err = im.Save(testIndexPath)
if err != nil {
t.Error(err)
}
im = nil
// open a meta that exists
im, err = openIndexMeta(testIndexPath)
if err != nil {
t.Error(err)
}
if im.Storage != "boltdb" {
t.Errorf("expected storage 'boltdb', got '%s'", im.Storage)
}
// save a meta that already exists
err = im.Save(testIndexPath)
if err == nil {
t.Errorf("expected error, got nil")
}
}
================================================
FILE: index_stats.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"encoding/json"
"sync"
"sync/atomic"
)
type IndexStat struct {
searches uint64
searchTime uint64
i *indexImpl
}
func (is *IndexStat) statsMap() map[string]interface{} {
m := map[string]interface{}{}
m["index"] = is.i.i.StatsMap()
m["searches"] = atomic.LoadUint64(&is.searches)
m["search_time"] = atomic.LoadUint64(&is.searchTime)
return m
}
func (is *IndexStat) MarshalJSON() ([]byte, error) {
m := is.statsMap()
return json.Marshal(m)
}
type IndexStats struct {
indexes map[string]*IndexStat
mutex sync.RWMutex
}
func NewIndexStats() *IndexStats {
return &IndexStats{
indexes: make(map[string]*IndexStat),
}
}
func (i *IndexStats) Register(index Index) {
i.mutex.Lock()
defer i.mutex.Unlock()
i.indexes[index.Name()] = index.Stats()
}
func (i *IndexStats) UnRegister(index Index) {
i.mutex.Lock()
defer i.mutex.Unlock()
delete(i.indexes, index.Name())
}
func (i *IndexStats) String() string {
i.mutex.RLock()
defer i.mutex.RUnlock()
bytes, err := json.Marshal(i.indexes)
if err != nil {
return "error marshaling stats"
}
return string(bytes)
}
var indexStats *IndexStats
================================================
FILE: index_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"context"
"encoding/json"
"fmt"
"io"
"log"
"math"
"os"
"path/filepath"
"reflect"
"sort"
"strconv"
"strings"
"sync"
"testing"
"time"
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/null"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/query"
index "github.com/blevesearch/bleve_index_api"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/index/upsidedown"
)
type Fatalfable interface {
Fatalf(format string, args ...interface{})
}
func createTmpIndexPath(f Fatalfable) string {
tmpIndexPath, err := os.MkdirTemp("", "bleve-testidx")
if err != nil {
f.Fatalf("error creating temp dir: %v", err)
}
return tmpIndexPath
}
func cleanupTmpIndexPath(f Fatalfable, path string) {
err := os.RemoveAll(path)
if err != nil {
f.Fatalf("error removing temp dir: %v", err)
}
}
func TestCrud(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
doca := map[string]interface{}{
"name": "marty",
"desc": "gophercon india",
}
err = idx.Index("a", doca)
if err != nil {
t.Error(err)
}
docy := map[string]interface{}{
"name": "jasper",
"desc": "clojure",
}
err = idx.Index("y", docy)
if err != nil {
t.Error(err)
}
err = idx.Delete("y")
if err != nil {
t.Error(err)
}
docx := map[string]interface{}{
"name": "rose",
"desc": "googler",
}
err = idx.Index("x", docx)
if err != nil {
t.Error(err)
}
err = idx.SetInternal([]byte("status"), []byte("pending"))
if err != nil {
t.Error(err)
}
docb := map[string]interface{}{
"name": "steve",
"desc": "cbft master",
}
batch := idx.NewBatch()
err = batch.Index("b", docb)
if err != nil {
t.Error(err)
}
batch.Delete("x")
batch.SetInternal([]byte("batchi"), []byte("batchv"))
batch.DeleteInternal([]byte("status"))
err = idx.Batch(batch)
if err != nil {
t.Error(err)
}
val, err := idx.GetInternal([]byte("batchi"))
if err != nil {
t.Error(err)
}
if string(val) != "batchv" {
t.Errorf("expected 'batchv', got '%s'", val)
}
val, err = idx.GetInternal([]byte("status"))
if err != nil {
t.Error(err)
}
if val != nil {
t.Errorf("expected nil, got '%s'", val)
}
err = idx.SetInternal([]byte("seqno"), []byte("7"))
if err != nil {
t.Error(err)
}
err = idx.SetInternal([]byte("status"), []byte("ready"))
if err != nil {
t.Error(err)
}
err = idx.DeleteInternal([]byte("status"))
if err != nil {
t.Error(err)
}
val, err = idx.GetInternal([]byte("status"))
if err != nil {
t.Error(err)
}
if val != nil {
t.Errorf("expected nil, got '%s'", val)
}
val, err = idx.GetInternal([]byte("seqno"))
if err != nil {
t.Error(err)
}
if string(val) != "7" {
t.Errorf("expected '7', got '%s'", val)
}
// close the index, open it again, and try some more things
err = idx.Close()
if err != nil {
t.Fatal(err)
}
idx, err = Open(tmpIndexPath)
if err != nil {
t.Fatal(err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
count, err := idx.DocCount()
if err != nil {
t.Fatal(err)
}
if count != 2 {
t.Errorf("expected doc count 2, got %d", count)
}
doc, err := idx.Document("a")
if err != nil {
t.Fatal(err)
}
foundNameField := false
doc.VisitFields(func(field index.Field) {
if field.Name() == "name" && string(field.Value()) == "marty" {
foundNameField = true
}
})
if !foundNameField {
t.Errorf("expected to find field named 'name' with value 'marty'")
}
fields, err := idx.Fields()
if err != nil {
t.Fatal(err)
}
expectedFields := map[string]bool{
"_all": false,
"name": false,
"desc": false,
}
if len(fields) < len(expectedFields) {
t.Fatalf("expected %d fields got %d", len(expectedFields), len(fields))
}
for _, f := range fields {
expectedFields[f] = true
}
for ef, efp := range expectedFields {
if !efp {
t.Errorf("field %s is missing", ef)
}
}
}
func approxSame(actual, expected uint64) bool {
modulus := func(a, b uint64) uint64 {
if a > b {
return a - b
}
return b - a
}
return float64(modulus(actual, expected))/float64(expected) < float64(0.30)
}
func checkStatsOnIndexedBatch(indexPath string, indexMapping mapping.IndexMapping,
expectedVal uint64,
) error {
var wg sync.WaitGroup
var statValError error
idx, err := NewUsing(indexPath, indexMapping, Config.DefaultIndexType, Config.DefaultMemKVStore, nil)
if err != nil {
return err
}
batch, err := getBatchFromData(idx, "sample-data.json")
if err != nil {
return fmt.Errorf("failed to form a batch %v\n", err)
}
wg.Add(1)
batch.SetPersistedCallback(func(e error) {
defer wg.Done()
stats, _ := idx.StatsMap()["index"].(map[string]interface{})
bytesWritten, _ := stats["num_bytes_written_at_index_time"].(uint64)
if !approxSame(bytesWritten, expectedVal) {
statValError = fmt.Errorf("expected bytes written is %d, got %v", expectedVal,
bytesWritten)
}
})
err = idx.Batch(batch)
if err != nil {
return fmt.Errorf("failed to index batch %v\n", err)
}
wg.Wait()
idx.Close()
return statValError
}
func TestBytesWritten(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
indexMapping := NewIndexMapping()
indexMapping.TypeField = "type"
indexMapping.DefaultAnalyzer = "en"
documentMapping := NewDocumentMapping()
indexMapping.AddDocumentMapping("hotel", documentMapping)
indexMapping.DocValuesDynamic = false
indexMapping.StoreDynamic = false
contentFieldMapping := NewTextFieldMapping()
contentFieldMapping.Index = true
contentFieldMapping.Store = false
contentFieldMapping.IncludeInAll = false
contentFieldMapping.IncludeTermVectors = false
contentFieldMapping.DocValues = false
reviewsMapping := NewDocumentMapping()
reviewsMapping.AddFieldMappingsAt("content", contentFieldMapping)
documentMapping.AddSubDocumentMapping("reviews", reviewsMapping)
typeFieldMapping := NewTextFieldMapping()
typeFieldMapping.Store = false
typeFieldMapping.IncludeInAll = false
typeFieldMapping.IncludeTermVectors = false
typeFieldMapping.DocValues = false
documentMapping.AddFieldMappingsAt("type", typeFieldMapping)
err = checkStatsOnIndexedBatch(tmpIndexPath, indexMapping, 57273)
if err != nil {
t.Fatal(err)
}
cleanupTmpIndexPath(t, tmpIndexPath)
contentFieldMapping.Store = true
tmpIndexPath1 := createTmpIndexPath(t)
err := checkStatsOnIndexedBatch(tmpIndexPath1, indexMapping, 76069)
if err != nil {
t.Fatal(err)
}
cleanupTmpIndexPath(t, tmpIndexPath1)
contentFieldMapping.Store = false
contentFieldMapping.IncludeInAll = true
tmpIndexPath2 := createTmpIndexPath(t)
err = checkStatsOnIndexedBatch(tmpIndexPath2, indexMapping, 68875)
if err != nil {
t.Fatal(err)
}
cleanupTmpIndexPath(t, tmpIndexPath2)
contentFieldMapping.IncludeInAll = false
contentFieldMapping.IncludeTermVectors = true
tmpIndexPath3 := createTmpIndexPath(t)
err = checkStatsOnIndexedBatch(tmpIndexPath3, indexMapping, 78985)
if err != nil {
t.Fatal(err)
}
cleanupTmpIndexPath(t, tmpIndexPath3)
contentFieldMapping.IncludeTermVectors = false
contentFieldMapping.DocValues = true
tmpIndexPath4 := createTmpIndexPath(t)
err = checkStatsOnIndexedBatch(tmpIndexPath4, indexMapping, 64228)
if err != nil {
t.Fatal(err)
}
cleanupTmpIndexPath(t, tmpIndexPath4)
}
func createIndexMappingOnSampleData() *mapping.IndexMappingImpl {
indexMapping := NewIndexMapping()
indexMapping.TypeField = "type"
indexMapping.DefaultAnalyzer = "en"
indexMapping.ScoringModel = index.DefaultScoringModel
documentMapping := NewDocumentMapping()
indexMapping.AddDocumentMapping("hotel", documentMapping)
indexMapping.StoreDynamic = false
indexMapping.DocValuesDynamic = false
contentFieldMapping := NewTextFieldMapping()
contentFieldMapping.Store = false
reviewsMapping := NewDocumentMapping()
reviewsMapping.AddFieldMappingsAt("content", contentFieldMapping)
documentMapping.AddSubDocumentMapping("reviews", reviewsMapping)
typeFieldMapping := NewTextFieldMapping()
typeFieldMapping.Store = false
documentMapping.AddFieldMappingsAt("type", typeFieldMapping)
return indexMapping
}
func TestBM25TFIDFScoring(t *testing.T) {
tmpIndexPath1 := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath1)
tmpIndexPath2 := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath2)
indexMapping := createIndexMappingOnSampleData()
indexMapping.ScoringModel = index.BM25Scoring
indexBM25, err := NewUsing(tmpIndexPath1, indexMapping, Config.DefaultIndexType, Config.DefaultMemKVStore, nil)
if err != nil {
t.Fatal(err)
}
indexMapping1 := createIndexMappingOnSampleData()
indexTFIDF, err := NewUsing(tmpIndexPath2, indexMapping1, Config.DefaultIndexType, Config.DefaultMemKVStore, nil)
if err != nil {
t.Fatal(err)
}
defer func() {
err := indexBM25.Close()
if err != nil {
t.Fatal(err)
}
err = indexTFIDF.Close()
if err != nil {
t.Fatal(err)
}
}()
batch, err := getBatchFromData(indexBM25, "sample-data.json")
if err != nil {
t.Fatalf("failed to form a batch")
}
err = indexBM25.Batch(batch)
if err != nil {
t.Fatalf("failed to index batch %v\n", err)
}
query := NewMatchQuery("Hotel")
query.FieldVal = "name"
searchRequest := NewSearchRequestOptions(query, int(10), 0, true)
resBM25, err := indexBM25.Search(searchRequest)
if err != nil {
t.Error(err)
}
batch, err = getBatchFromData(indexTFIDF, "sample-data.json")
if err != nil {
t.Fatalf("failed to form a batch")
}
err = indexTFIDF.Batch(batch)
if err != nil {
t.Fatalf("failed to index batch %v\n", err)
}
resTFIDF, err := indexTFIDF.Search(searchRequest)
if err != nil {
t.Error(err)
}
for i, hit := range resTFIDF.Hits {
if hit.Score < resBM25.Hits[i].Score {
t.Fatalf("expected the score to be higher for BM25, got %v and %v",
resBM25.Hits[i].Score, hit.Score)
}
}
}
func TestBM25GlobalScoring(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
indexMapping := createIndexMappingOnSampleData()
indexMapping.ScoringModel = index.BM25Scoring
idxSinglePartition, err := NewUsing(tmpIndexPath, indexMapping, Config.DefaultIndexType, Config.DefaultMemKVStore, nil)
if err != nil {
t.Fatal(err)
}
defer func() {
err := idxSinglePartition.Close()
if err != nil {
t.Fatal(err)
}
}()
batch, err := getBatchFromData(idxSinglePartition, "sample-data.json")
if err != nil {
t.Fatalf("failed to form a batch")
}
err = idxSinglePartition.Batch(batch)
if err != nil {
t.Fatalf("failed to index batch %v\n", err)
}
query := NewMatchQuery("Hotel")
query.FieldVal = "name"
searchRequest := NewSearchRequestOptions(query, int(10), 0, true)
res, err := idxSinglePartition.Search(searchRequest)
if err != nil {
t.Error(err)
}
singlePartHits := res.Hits
dataset, _ := readDataFromFile("sample-data.json")
tmpIndexPath1 := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath1)
idxPart1, err := NewUsing(tmpIndexPath1, indexMapping, Config.DefaultIndexType, Config.DefaultMemKVStore, nil)
if err != nil {
t.Fatal(err)
}
defer func() {
err := idxPart1.Close()
if err != nil {
t.Fatal(err)
}
}()
batch1 := idxPart1.NewBatch()
for _, doc := range dataset[:len(dataset)/2] {
err = batch1.Index(fmt.Sprintf("%d", doc["id"]), doc)
if err != nil {
t.Fatal(err)
}
}
err = idxPart1.Batch(batch1)
if err != nil {
t.Fatal(err)
}
tmpIndexPath2 := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath2)
idxPart2, err := NewUsing(tmpIndexPath2, indexMapping, Config.DefaultIndexType, Config.DefaultMemKVStore, nil)
if err != nil {
t.Fatal(err)
}
defer func() {
err := idxPart2.Close()
if err != nil {
t.Fatal(err)
}
}()
batch2 := idxPart2.NewBatch()
for _, doc := range dataset[len(dataset)/2:] {
err = batch2.Index(fmt.Sprintf("%d", doc["id"]), doc)
if err != nil {
t.Fatal(err)
}
}
err = idxPart2.Batch(batch2)
if err != nil {
t.Fatal(err)
}
multiPartIndex := NewIndexAlias(idxPart1, idxPart2)
err = multiPartIndex.SetIndexMapping(indexMapping)
if err != nil {
t.Fatal(err)
}
ctx := context.Background()
// this key is set to ensure that we have a consistent scoring at the index alias
// level (it forces a pre search phase which can have a small overhead)
ctx = context.WithValue(ctx, search.SearchTypeKey, search.GlobalScoring)
res, err = multiPartIndex.SearchInContext(ctx, searchRequest)
if err != nil {
t.Error(err)
}
for i, hit := range res.Hits {
if hit.Score != singlePartHits[i].Score {
t.Fatalf("expected the scores to be the same, got %v and %v",
hit.Score, singlePartHits[i].Score)
}
}
}
func TestBytesRead(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
indexMapping := NewIndexMapping()
indexMapping.TypeField = "type"
indexMapping.DefaultAnalyzer = "en"
documentMapping := NewDocumentMapping()
indexMapping.AddDocumentMapping("hotel", documentMapping)
indexMapping.StoreDynamic = false
indexMapping.DocValuesDynamic = false
contentFieldMapping := NewTextFieldMapping()
contentFieldMapping.Store = false
reviewsMapping := NewDocumentMapping()
reviewsMapping.AddFieldMappingsAt("content", contentFieldMapping)
documentMapping.AddSubDocumentMapping("reviews", reviewsMapping)
typeFieldMapping := NewTextFieldMapping()
typeFieldMapping.Store = false
documentMapping.AddFieldMappingsAt("type", typeFieldMapping)
idx, err := NewUsing(tmpIndexPath, indexMapping, Config.DefaultIndexType, Config.DefaultMemKVStore, nil)
if err != nil {
t.Fatal(err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
batch, err := getBatchFromData(idx, "sample-data.json")
if err != nil {
t.Fatalf("failed to form a batch")
}
err = idx.Batch(batch)
if err != nil {
t.Fatalf("failed to index batch %v\n", err)
}
query := NewQueryStringQuery("united")
searchRequest := NewSearchRequestOptions(query, int(10), 0, true)
res, err := idx.Search(searchRequest)
if err != nil {
t.Error(err)
}
stats, _ := idx.StatsMap()["index"].(map[string]interface{})
prevBytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64)
expectedBytesRead := uint64(21164)
if supportForVectorSearch {
expectedBytesRead = 21574
}
if prevBytesRead != expectedBytesRead && res.Cost == prevBytesRead {
t.Fatalf("expected bytes read for query string %v, got %v",
expectedBytesRead, prevBytesRead)
}
// subsequent queries on the same field results in lesser amount
// of bytes read because the segment static and dictionary is reused and not
// loaded from mmap'd filed
res, err = idx.Search(searchRequest)
if err != nil {
t.Error(err)
}
stats, _ = idx.StatsMap()["index"].(map[string]interface{})
bytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead-prevBytesRead != 66 && res.Cost == bytesRead-prevBytesRead {
t.Fatalf("expected bytes read for query string 66, got %v",
bytesRead-prevBytesRead)
}
prevBytesRead = bytesRead
fuzz := NewFuzzyQuery("hotel")
fuzz.FieldVal = "reviews.content"
fuzz.Fuzziness = 2
searchRequest = NewSearchRequest(fuzz)
res, err = idx.Search(searchRequest)
if err != nil {
t.Error(err)
}
stats, _ = idx.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead-prevBytesRead != 8468 && res.Cost == bytesRead-prevBytesRead {
t.Fatalf("expected bytes read for fuzzy query is 8468, got %v",
bytesRead-prevBytesRead)
}
prevBytesRead = bytesRead
typeFacet := NewFacetRequest("type", 2)
query = NewQueryStringQuery("united")
searchRequest = NewSearchRequestOptions(query, int(0), 0, true)
searchRequest.AddFacet("types", typeFacet)
res, err = idx.Search(searchRequest)
if err != nil {
t.Error(err)
}
stats, _ = idx.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
if !approxSame(bytesRead-prevBytesRead, 196) && res.Cost == bytesRead-prevBytesRead {
t.Fatalf("expected bytes read for faceted query is around 196, got %v",
bytesRead-prevBytesRead)
}
prevBytesRead = bytesRead
min := float64(8660)
max := float64(8665)
numRangeQuery := NewNumericRangeQuery(&min, &max)
numRangeQuery.FieldVal = "id"
searchRequest = NewSearchRequestOptions(numRangeQuery, int(10), 0, true)
res, err = idx.Search(searchRequest)
if err != nil {
t.Error(err)
}
stats, _ = idx.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead-prevBytesRead != 924 && res.Cost == bytesRead-prevBytesRead {
t.Fatalf("expected bytes read for numeric range query is 924, got %v",
bytesRead-prevBytesRead)
}
prevBytesRead = bytesRead
searchRequest = NewSearchRequestOptions(query, int(10), 0, true)
searchRequest.Highlight = &HighlightRequest{}
res, err = idx.Search(searchRequest)
if err != nil {
t.Error(err)
}
stats, _ = idx.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead-prevBytesRead != 105 && res.Cost == bytesRead-prevBytesRead {
t.Fatalf("expected bytes read for query with highlighter is 105, got %v",
bytesRead-prevBytesRead)
}
prevBytesRead = bytesRead
disQuery := NewDisjunctionQuery(NewMatchQuery("hotel"), NewMatchQuery("united"))
searchRequest = NewSearchRequestOptions(disQuery, int(10), 0, true)
res, err = idx.Search(searchRequest)
if err != nil {
t.Error(err)
}
// expectation is that the bytes read is roughly equal to sum of sub queries in
// the disjunction query plus the segment loading portion for the second subquery
// since it's created afresh and not reused
stats, _ = idx.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead-prevBytesRead != 120 && res.Cost == bytesRead-prevBytesRead {
t.Fatalf("expected bytes read for disjunction query is 120, got %v",
bytesRead-prevBytesRead)
}
}
func TestBytesReadStored(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
indexMapping := NewIndexMapping()
indexMapping.TypeField = "type"
indexMapping.DefaultAnalyzer = "en"
documentMapping := NewDocumentMapping()
indexMapping.AddDocumentMapping("hotel", documentMapping)
indexMapping.DocValuesDynamic = false
indexMapping.StoreDynamic = false
contentFieldMapping := NewTextFieldMapping()
contentFieldMapping.Store = true
contentFieldMapping.IncludeInAll = false
contentFieldMapping.IncludeTermVectors = false
reviewsMapping := NewDocumentMapping()
reviewsMapping.AddFieldMappingsAt("content", contentFieldMapping)
documentMapping.AddSubDocumentMapping("reviews", reviewsMapping)
typeFieldMapping := NewTextFieldMapping()
typeFieldMapping.Store = false
typeFieldMapping.IncludeInAll = false
typeFieldMapping.IncludeTermVectors = false
documentMapping.AddFieldMappingsAt("type", typeFieldMapping)
idx, err := NewUsing(tmpIndexPath, indexMapping, Config.DefaultIndexType, Config.DefaultMemKVStore, nil)
if err != nil {
t.Fatal(err)
}
batch, err := getBatchFromData(idx, "sample-data.json")
if err != nil {
t.Fatalf("failed to form a batch %v\n", err)
}
err = idx.Batch(batch)
if err != nil {
t.Fatalf("failed to index batch %v\n", err)
}
query := NewTermQuery("hotel")
query.FieldVal = "reviews.content"
searchRequest := NewSearchRequestOptions(query, int(10), 0, true)
res, err := idx.Search(searchRequest)
if err != nil {
t.Error(err)
}
stats, _ := idx.StatsMap()["index"].(map[string]interface{})
bytesRead, _ := stats["num_bytes_read_at_query_time"].(uint64)
expectedBytesRead := uint64(11025)
if supportForVectorSearch {
expectedBytesRead = 11435
}
if bytesRead != expectedBytesRead && bytesRead == res.Cost {
t.Fatalf("expected the bytes read stat to be around %v, got %v", expectedBytesRead, bytesRead)
}
prevBytesRead := bytesRead
searchRequest = NewSearchRequestOptions(query, int(10), 0, true)
res, err = idx.Search(searchRequest)
if err != nil {
t.Error(err)
}
stats, _ = idx.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead-prevBytesRead != 48 && bytesRead-prevBytesRead == res.Cost {
t.Fatalf("expected the bytes read stat to be around 48, got %v", bytesRead-prevBytesRead)
}
prevBytesRead = bytesRead
searchRequest = NewSearchRequestOptions(query, int(10), 0, true)
searchRequest.Fields = []string{"*"}
res, err = idx.Search(searchRequest)
if err != nil {
t.Error(err)
}
stats, _ = idx.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead-prevBytesRead != 26511 && bytesRead-prevBytesRead == res.Cost {
t.Fatalf("expected the bytes read stat to be around 26511, got %v",
bytesRead-prevBytesRead)
}
idx.Close()
cleanupTmpIndexPath(t, tmpIndexPath)
// same type of querying but on field "type"
contentFieldMapping.Store = false
typeFieldMapping.Store = true
tmpIndexPath1 := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath1)
idx1, err := NewUsing(tmpIndexPath1, indexMapping, Config.DefaultIndexType, Config.DefaultMemKVStore, nil)
if err != nil {
t.Fatal(err)
}
defer func() {
err := idx1.Close()
if err != nil {
t.Fatal(err)
}
}()
batch, err = getBatchFromData(idx1, "sample-data.json")
if err != nil {
t.Fatalf("failed to form a batch %v\n", err)
}
err = idx1.Batch(batch)
if err != nil {
t.Fatalf("failed to index batch %v\n", err)
}
query = NewTermQuery("hotel")
query.FieldVal = "type"
searchRequest = NewSearchRequestOptions(query, int(10), 0, true)
res, err = idx1.Search(searchRequest)
if err != nil {
t.Error(err)
}
stats, _ = idx1.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
expectedBytesRead = uint64(3212)
if supportForVectorSearch {
expectedBytesRead = 3622
}
if bytesRead != expectedBytesRead && bytesRead == res.Cost {
t.Fatalf("expected the bytes read stat to be around %v, got %v", expectedBytesRead, bytesRead)
}
prevBytesRead = bytesRead
res, err = idx1.Search(searchRequest)
if err != nil {
t.Error(err)
}
stats, _ = idx1.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead-prevBytesRead != 47 && bytesRead-prevBytesRead == res.Cost {
t.Fatalf("expected the bytes read stat to be around 47, got %v", bytesRead-prevBytesRead)
}
prevBytesRead = bytesRead
searchRequest.Fields = []string{"*"}
res, err = idx1.Search(searchRequest)
if err != nil {
t.Error(err)
}
stats, _ = idx1.StatsMap()["index"].(map[string]interface{})
bytesRead, _ = stats["num_bytes_read_at_query_time"].(uint64)
if bytesRead-prevBytesRead != 77 && bytesRead-prevBytesRead == res.Cost {
t.Fatalf("expected the bytes read stat to be around 77, got %v", bytesRead-prevBytesRead)
}
}
func readDataFromFile(fileName string) ([]map[string]interface{}, error) {
pwd, err := os.Getwd()
if err != nil {
return nil, err
}
path := filepath.Join(pwd, "data", "test", fileName)
var dataset []map[string]interface{}
fileContent, err := os.ReadFile(path)
if err != nil {
return nil, err
}
err = json.Unmarshal(fileContent, &dataset)
if err != nil {
return nil, err
}
return dataset, nil
}
func getBatchFromData(idx Index, fileName string) (*Batch, error) {
dataset, err := readDataFromFile(fileName)
batch := idx.NewBatch()
for _, doc := range dataset {
err = batch.Index(fmt.Sprintf("%d", doc["id"]), doc)
if err != nil {
return nil, err
}
}
return batch, err
}
func TestIndexCreateNewOverExisting(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
_, err = New(tmpIndexPath, NewIndexMapping())
if err != ErrorIndexPathExists {
t.Fatalf("expected error index path exists, got %v", err)
}
}
func TestIndexOpenNonExisting(t *testing.T) {
_, err := Open("doesnotexist")
if err != ErrorIndexPathDoesNotExist {
t.Fatalf("expected error index path does not exist, got %v", err)
}
}
func TestIndexOpenMetaMissingOrCorrupt(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
tmpIndexPathMeta := filepath.Join(tmpIndexPath, "index_meta.json")
// now intentionally change the storage type
err = os.WriteFile(tmpIndexPathMeta, []byte(`{"storage":"mystery"}`), 0o666)
if err != nil {
t.Fatal(err)
}
_, err = Open(tmpIndexPath)
if err == nil {
t.Fatalf("expected error for unknown storage type, got %v", err)
}
// now intentionally corrupt the metadata
err = os.WriteFile(tmpIndexPathMeta, []byte("corrupted"), 0o666)
if err != nil {
t.Fatal(err)
}
_, err = Open(tmpIndexPath)
if err != ErrorIndexMetaCorrupt {
t.Fatalf("expected error index metadata corrupted, got %v", err)
}
// now intentionally remove the metadata
err = os.Remove(tmpIndexPathMeta)
if err != nil {
t.Fatal(err)
}
_, err = Open(tmpIndexPath)
if err != ErrorIndexMetaMissing {
t.Fatalf("expected error index metadata missing, got %v", err)
}
}
func TestInMemIndex(t *testing.T) {
index, err := NewMemOnly(NewIndexMapping())
if err != nil {
t.Fatal(err)
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}
func TestClosedIndex(t *testing.T) {
index, err := NewMemOnly(NewIndexMapping())
if err != nil {
t.Fatal(err)
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
err = index.Index("test", "test")
if err != ErrorIndexClosed {
t.Errorf("expected error index closed, got %v", err)
}
err = index.Delete("test")
if err != ErrorIndexClosed {
t.Errorf("expected error index closed, got %v", err)
}
b := index.NewBatch()
err = index.Batch(b)
if err != ErrorIndexClosed {
t.Errorf("expected error index closed, got %v", err)
}
_, err = index.Document("test")
if err != ErrorIndexClosed {
t.Errorf("expected error index closed, got %v", err)
}
_, err = index.DocCount()
if err != ErrorIndexClosed {
t.Errorf("expected error index closed, got %v", err)
}
_, err = index.Search(NewSearchRequest(NewTermQuery("test")))
if err != ErrorIndexClosed {
t.Errorf("expected error index closed, got %v", err)
}
_, err = index.Fields()
if err != ErrorIndexClosed {
t.Errorf("expected error index closed, got %v", err)
}
}
type slowQuery struct {
actual query.Query
delay time.Duration
}
func (s *slowQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
time.Sleep(s.delay)
return s.actual.Searcher(ctx, i, m, options)
}
func TestSlowSearch(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
defer func() {
// reset logger back to normal
SetLog(log.New(io.Discard, "bleve", log.LstdFlags))
}()
// set custom logger
var sdw sawDataWriter
SetLog(log.New(&sdw, "bleve", log.LstdFlags))
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
Config.SlowSearchLogThreshold = 1 * time.Minute
query := NewTermQuery("water")
req := NewSearchRequest(query)
_, err = index.Search(req)
if err != nil {
t.Fatal(err)
}
if sdw.sawData {
t.Errorf("expected to not see slow query logged, but did")
}
sq := &slowQuery{
actual: query,
delay: 50 * time.Millisecond, // on Windows timer resolution is 15ms
}
req.Query = sq
Config.SlowSearchLogThreshold = 1 * time.Microsecond
_, err = index.Search(req)
if err != nil {
t.Fatal(err)
}
if !sdw.sawData {
t.Errorf("expected to see slow query logged, but didn't")
}
}
type sawDataWriter struct {
sawData bool
}
func (s *sawDataWriter) Write(p []byte) (n int, err error) {
s.sawData = true
return len(p), nil
}
func TestStoredFieldPreserved(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
doca := map[string]interface{}{
"name": "Marty",
"desc": "GopherCON India",
"bool": true,
"num": float64(1),
}
err = index.Index("a", doca)
if err != nil {
t.Error(err)
}
q := NewTermQuery("marty")
req := NewSearchRequest(q)
req.Fields = []string{"name", "desc", "bool", "num"}
res, err := index.Search(req)
if err != nil {
t.Error(err)
}
if len(res.Hits) != 1 {
t.Fatalf("expected 1 hit, got %d", len(res.Hits))
}
if res.Hits[0].Fields["name"] != "Marty" {
t.Errorf("expected 'Marty' got '%s'", res.Hits[0].Fields["name"])
}
if res.Hits[0].Fields["desc"] != "GopherCON India" {
t.Errorf("expected 'GopherCON India' got '%s'", res.Hits[0].Fields["desc"])
}
if res.Hits[0].Fields["num"] != float64(1) {
t.Errorf("expected '1' got '%v'", res.Hits[0].Fields["num"])
}
if res.Hits[0].Fields["bool"] != true {
t.Errorf("expected 'true' got '%v'", res.Hits[0].Fields["bool"])
}
}
func TestDict(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
doca := map[string]interface{}{
"name": "marty",
"desc": "gophercon india",
}
err = index.Index("a", doca)
if err != nil {
t.Error(err)
}
docy := map[string]interface{}{
"name": "jasper",
"desc": "clojure",
}
err = index.Index("y", docy)
if err != nil {
t.Error(err)
}
docx := map[string]interface{}{
"name": "rose",
"desc": "googler",
}
err = index.Index("x", docx)
if err != nil {
t.Error(err)
}
dict, err := index.FieldDict("name")
if err != nil {
t.Error(err)
}
terms := []string{}
de, err := dict.Next()
for err == nil && de != nil {
terms = append(terms, string(de.Term))
de, err = dict.Next()
}
expectedTerms := []string{"jasper", "marty", "rose"}
if !reflect.DeepEqual(terms, expectedTerms) {
t.Errorf("expected %v, got %v", expectedTerms, terms)
}
err = dict.Close()
if err != nil {
t.Fatal(err)
}
// test start and end range
dict, err = index.FieldDictRange("name", []byte("marty"), []byte("rose"))
if err != nil {
t.Error(err)
}
terms = []string{}
de, err = dict.Next()
for err == nil && de != nil {
terms = append(terms, string(de.Term))
de, err = dict.Next()
}
expectedTerms = []string{"marty", "rose"}
if !reflect.DeepEqual(terms, expectedTerms) {
t.Errorf("expected %v, got %v", expectedTerms, terms)
}
err = dict.Close()
if err != nil {
t.Fatal(err)
}
docz := map[string]interface{}{
"name": "prefix",
"desc": "bob cat cats catting dog doggy zoo",
}
err = index.Index("z", docz)
if err != nil {
t.Error(err)
}
dict, err = index.FieldDictPrefix("desc", []byte("cat"))
if err != nil {
t.Error(err)
}
terms = []string{}
de, err = dict.Next()
for err == nil && de != nil {
terms = append(terms, string(de.Term))
de, err = dict.Next()
}
expectedTerms = []string{"cat", "cats", "catting"}
if !reflect.DeepEqual(terms, expectedTerms) {
t.Errorf("expected %v, got %v", expectedTerms, terms)
}
stats := index.Stats()
if stats == nil {
t.Errorf("expected IndexStat, got nil")
}
err = dict.Close()
if err != nil {
t.Fatal(err)
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}
func TestBatchString(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
batch := index.NewBatch()
err = batch.Index("a", []byte("{}"))
if err != nil {
t.Fatal(err)
}
batch.Delete("b")
batch.SetInternal([]byte("c"), []byte{})
batch.DeleteInternal([]byte("d"))
batchStr := batch.String()
if !strings.HasPrefix(batchStr, "Batch (2 ops, 2 internal ops)") {
t.Errorf("expected to start with Batch (2 ops, 2 internal ops), did not")
}
if !strings.Contains(batchStr, "INDEX - 'a'") {
t.Errorf("expected to contain INDEX - 'a', did not")
}
if !strings.Contains(batchStr, "DELETE - 'b'") {
t.Errorf("expected to contain DELETE - 'b', did not")
}
if !strings.Contains(batchStr, "SET INTERNAL - 'c'") {
t.Errorf("expected to contain SET INTERNAL - 'c', did not")
}
if !strings.Contains(batchStr, "DELETE INTERNAL - 'd'") {
t.Errorf("expected to contain DELETE INTERNAL - 'd', did not")
}
}
func TestIndexMetadataRaceBug198(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
wg := sync.WaitGroup{}
wg.Add(1)
done := make(chan struct{})
go func() {
for {
select {
case <-done:
wg.Done()
return
default:
_, err2 := index.DocCount()
if err2 != nil {
t.Error(err2)
wg.Done()
return
}
}
}
}()
for i := 0; i < 100; i++ {
batch := index.NewBatch()
err = batch.Index("a", []byte("{}"))
if err != nil {
t.Fatal(err)
}
err = index.Batch(batch)
if err != nil {
t.Fatal(err)
}
}
close(done)
wg.Wait()
}
func TestSortMatchSearch(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
names := []string{"Noam", "Uri", "David", "Yosef", "Eitan", "Itay", "Ariel", "Daniel", "Omer", "Yogev", "Yehonatan", "Moshe", "Mohammed", "Yusuf", "Omar"}
days := []string{"Sunday", "Monday", "Tuesday", "Wednesday", "Thursday", "Friday", "Saturday"}
numbers := []string{"One", "Two", "Three", "Four", "Five", "Six", "Seven", "Eight", "Nine", "Ten", "Eleven", "Twelve"}
b := index.NewBatch()
for i := 0; i < 200; i++ {
doc := make(map[string]interface{})
doc["Name"] = names[i%len(names)]
doc["Day"] = days[i%len(days)]
doc["Number"] = numbers[i%len(numbers)]
err = b.Index(fmt.Sprintf("%d", i), doc)
if err != nil {
t.Fatal(err)
}
}
err = index.Batch(b)
if err != nil {
t.Fatal(err)
}
req := NewSearchRequest(NewMatchQuery("One"))
req.SortBy([]string{"Day", "Name"})
req.Fields = []string{"*"}
sr, err := index.Search(req)
if err != nil {
t.Fatal(err)
}
prev := ""
for _, hit := range sr.Hits {
val := hit.Fields["Day"].(string)
if prev > val {
t.Errorf("Hits must be sorted by 'Day'. Found '%s' before '%s'", prev, val)
}
prev = val
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexCountMatchSearch(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
var wg sync.WaitGroup
for i := 0; i < 10; i++ {
wg.Add(1)
go func(i int) {
b := index.NewBatch()
for j := 0; j < 200; j++ {
id := fmt.Sprintf("%d", (i*200)+j)
doc := struct {
Body string
}{
Body: "match",
}
err := b.Index(id, doc)
if err != nil {
t.Error(err)
wg.Done()
return
}
}
err := index.Batch(b)
if err != nil {
t.Error(err)
wg.Done()
return
}
wg.Done()
}(i)
}
wg.Wait()
// search for something that should match all documents
sr, err := index.Search(NewSearchRequest(NewMatchQuery("match")))
if err != nil {
t.Fatal(err)
}
// get the index document count
dc, err := index.DocCount()
if err != nil {
t.Fatal(err)
}
// make sure test is working correctly, doc count should 2000
if dc != 2000 {
t.Errorf("expected doc count 2000, got %d", dc)
}
// make sure our search found all the documents
if dc != sr.Total {
t.Errorf("expected search result total %d to match doc count %d", sr.Total, dc)
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}
func TestBatchReset(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
batch := index.NewBatch()
err = batch.Index("k1", struct {
Body string
}{
Body: "v1",
})
if err != nil {
t.Error(err)
}
batch.Delete("k2")
batch.SetInternal([]byte("k3"), []byte("v3"))
batch.DeleteInternal([]byte("k4"))
if batch.Size() != 4 {
t.Logf("%v", batch)
t.Errorf("expected batch size 4, got %d", batch.Size())
}
batch.Reset()
if batch.Size() != 0 {
t.Errorf("expected batch size 0 after reset, got %d", batch.Size())
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}
func TestDocumentFieldArrayPositions(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
// index a document with an array of strings
err = idx.Index("k", struct {
Messages []string
}{
Messages: []string{
"first",
"second",
"third",
"last",
},
})
if err != nil {
t.Fatal(err)
}
// load the document
doc, err := idx.Document("k")
if err != nil {
t.Fatal(err)
}
doc.VisitFields(func(f index.Field) {
if reflect.DeepEqual(f.Value(), []byte("first")) {
ap := f.ArrayPositions()
if len(ap) < 1 {
t.Errorf("expected an array position, got none")
return
}
if ap[0] != 0 {
t.Errorf("expected 'first' in array position 0, got %d", ap[0])
}
}
if reflect.DeepEqual(f.Value(), []byte("second")) {
ap := f.ArrayPositions()
if len(ap) < 1 {
t.Errorf("expected an array position, got none")
return
}
if ap[0] != 1 {
t.Errorf("expected 'second' in array position 1, got %d", ap[0])
}
}
if reflect.DeepEqual(f.Value(), []byte("third")) {
ap := f.ArrayPositions()
if len(ap) < 1 {
t.Errorf("expected an array position, got none")
return
}
if ap[0] != 2 {
t.Errorf("expected 'third' in array position 2, got %d", ap[0])
}
}
if reflect.DeepEqual(f.Value(), []byte("last")) {
ap := f.ArrayPositions()
if len(ap) < 1 {
t.Errorf("expected an array position, got none")
return
}
if ap[0] != 3 {
t.Errorf("expected 'last' in array position 3, got %d", ap[0])
}
}
})
// now index a document in the same field with a single string
err = idx.Index("k2", struct {
Messages string
}{
Messages: "only",
})
if err != nil {
t.Fatal(err)
}
// load the document
doc, err = idx.Document("k2")
if err != nil {
t.Fatal(err)
}
doc.VisitFields(func(f index.Field) {
if reflect.DeepEqual(f.Value(), []byte("only")) {
ap := f.ArrayPositions()
if len(ap) != 0 {
t.Errorf("expected no array positions, got %d", len(ap))
return
}
}
})
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}
func TestKeywordSearchBug207(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
f := NewTextFieldMapping()
f.Analyzer = keyword.Name
m := NewIndexMapping()
m.DefaultMapping = NewDocumentMapping()
m.DefaultMapping.AddFieldMappingsAt("Body", f)
index, err := New(tmpIndexPath, m)
if err != nil {
t.Fatal(err)
}
doc1 := struct {
Body string
}{
Body: "a555c3bb06f7a127cda000005",
}
err = index.Index("a", doc1)
if err != nil {
t.Fatal(err)
}
doc2 := struct {
Body string
}{
Body: "555c3bb06f7a127cda000005",
}
err = index.Index("b", doc2)
if err != nil {
t.Fatal(err)
}
// now search for these terms
sreq := NewSearchRequest(NewTermQuery("a555c3bb06f7a127cda000005"))
sres, err := index.Search(sreq)
if err != nil {
t.Fatal(err)
}
if sres.Total != 1 {
t.Errorf("expected 1 result, got %d", sres.Total)
}
if sres.Hits[0].ID != "a" {
t.Errorf("expecated id 'a', got '%s'", sres.Hits[0].ID)
}
sreq = NewSearchRequest(NewTermQuery("555c3bb06f7a127cda000005"))
sres, err = index.Search(sreq)
if err != nil {
t.Fatal(err)
}
if sres.Total != 1 {
t.Errorf("expected 1 result, got %d", sres.Total)
}
if sres.Hits[0].ID != "b" {
t.Errorf("expecated id 'b', got '%s'", sres.Hits[0].ID)
}
// now do the same searches using query strings
sreq = NewSearchRequest(NewQueryStringQuery("Body:a555c3bb06f7a127cda000005"))
sres, err = index.Search(sreq)
if err != nil {
t.Fatal(err)
}
if sres.Total != 1 {
t.Errorf("expected 1 result, got %d", sres.Total)
}
if sres.Hits[0].ID != "a" {
t.Errorf("expecated id 'a', got '%s'", sres.Hits[0].ID)
}
sreq = NewSearchRequest(NewQueryStringQuery(`Body:555c3bb06f7a127cda000005`))
sres, err = index.Search(sreq)
if err != nil {
t.Fatal(err)
}
if sres.Total != 1 {
t.Errorf("expected 1 result, got %d", sres.Total)
}
if sres.Hits[0].ID != "b" {
t.Errorf("expecated id 'b', got '%s'", sres.Hits[0].ID)
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}
func TestTermVectorArrayPositions(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
// index a document with an array of strings
err = index.Index("k", struct {
Messages []string
}{
Messages: []string{
"first",
"second",
"third",
"last",
},
})
if err != nil {
t.Fatal(err)
}
// search for this document in all field
tq := NewTermQuery("second")
tsr := NewSearchRequest(tq)
tsr.IncludeLocations = true
results, err := index.Search(tsr)
if err != nil {
t.Fatal(err)
}
if results.Total != 1 {
t.Fatalf("expected 1 result, got %d", results.Total)
}
if len(results.Hits[0].Locations["Messages"]["second"]) < 1 {
t.Fatalf("expected at least one location")
}
if len(results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions) < 1 {
t.Fatalf("expected at least one location array position")
}
if results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions[0] != 1 {
t.Fatalf("expected array position 1, got %d", results.Hits[0].Locations["Messages"]["second"][0].ArrayPositions[0])
}
// repeat search for this document in Messages field
tq2 := NewTermQuery("third")
tq2.SetField("Messages")
tsr = NewSearchRequest(tq2)
tsr.IncludeLocations = true
results, err = index.Search(tsr)
if err != nil {
t.Fatal(err)
}
if results.Total != 1 {
t.Fatalf("expected 1 result, got %d", results.Total)
}
if len(results.Hits[0].Locations["Messages"]["third"]) < 1 {
t.Fatalf("expected at least one location")
}
if len(results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions) < 1 {
t.Fatalf("expected at least one location array position")
}
if results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions[0] != 2 {
t.Fatalf("expected array position 2, got %d", results.Hits[0].Locations["Messages"]["third"][0].ArrayPositions[0])
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}
func TestDocumentStaticMapping(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
m := NewIndexMapping()
m.DefaultMapping = NewDocumentStaticMapping()
m.DefaultMapping.AddFieldMappingsAt("Text", NewTextFieldMapping())
m.DefaultMapping.AddFieldMappingsAt("Date", NewDateTimeFieldMapping())
m.DefaultMapping.AddFieldMappingsAt("Numeric", NewNumericFieldMapping())
index, err := New(tmpIndexPath, m)
if err != nil {
t.Fatal(err)
}
doc1 := struct {
Text string
IgnoredText string
Numeric float64
IgnoredNumeric float64
Date time.Time
IgnoredDate time.Time
}{
Text: "valid text",
IgnoredText: "ignored text",
Numeric: 10,
IgnoredNumeric: 20,
Date: time.Unix(1, 0),
IgnoredDate: time.Unix(2, 0),
}
err = index.Index("a", doc1)
if err != nil {
t.Fatal(err)
}
fields, err := index.Fields()
if err != nil {
t.Fatal(err)
}
sort.Strings(fields)
expectedFields := []string{"Date", "Numeric", "Text", "_all"}
if len(fields) < len(expectedFields) {
t.Fatalf("invalid field count: %d", len(fields))
}
for i, expected := range expectedFields {
if expected != fields[i] {
t.Fatalf("unexpected field[%d]: %s", i, fields[i])
}
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexEmptyDocId(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := map[string]interface{}{
"body": "nodocid",
}
err = index.Index("", doc)
if err != ErrorEmptyID {
t.Errorf("expect index empty doc id to fail")
}
err = index.Delete("")
if err != ErrorEmptyID {
t.Errorf("expect delete empty doc id to fail")
}
batch := index.NewBatch()
err = batch.Index("", doc)
if err != ErrorEmptyID {
t.Errorf("expect index empty doc id in batch to fail")
}
batch.Delete("")
if batch.Size() > 0 {
t.Errorf("expect delete empty doc id in batch to be ignored")
}
}
func TestDateTimeFieldMappingIssue287(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
f := NewDateTimeFieldMapping()
m := NewIndexMapping()
m.DefaultMapping = NewDocumentMapping()
m.DefaultMapping.AddFieldMappingsAt("Date", f)
index, err := New(tmpIndexPath, m)
if err != nil {
t.Fatal(err)
}
type doc struct {
Date time.Time
}
now := time.Now()
// 3hr ago to 1hr ago
for i := 0; i < 3; i++ {
d := doc{now.Add(time.Duration((i - 3)) * time.Hour)}
err = index.Index(strconv.FormatInt(int64(i), 10), d)
if err != nil {
t.Fatal(err)
}
}
// search range across all docs
start := now.Add(-4 * time.Hour)
end := now
sreq := NewSearchRequest(NewDateRangeQuery(start, end))
sres, err := index.Search(sreq)
if err != nil {
t.Fatal(err)
}
if sres.Total != 3 {
t.Errorf("expected 3 results, got %d", sres.Total)
}
// search range includes only oldest
start = now.Add(-4 * time.Hour)
end = now.Add(-121 * time.Minute)
sreq = NewSearchRequest(NewDateRangeQuery(start, end))
sres, err = index.Search(sreq)
if err != nil {
t.Fatal(err)
}
if sres.Total != 1 {
t.Errorf("expected 1 results, got %d", sres.Total)
}
if sres.Total > 0 && sres.Hits[0].ID != "0" {
t.Errorf("expecated id '0', got '%s'", sres.Hits[0].ID)
}
// search range includes only newest
start = now.Add(-61 * time.Minute)
end = now
sreq = NewSearchRequest(NewDateRangeQuery(start, end))
sres, err = index.Search(sreq)
if err != nil {
t.Fatal(err)
}
if sres.Total != 1 {
t.Errorf("expected 1 results, got %d", sres.Total)
}
if sres.Total > 0 && sres.Hits[0].ID != "2" {
t.Errorf("expecated id '2', got '%s'", sres.Hits[0].ID)
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}
func TestDocumentFieldArrayPositionsBug295(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
// index a document with an array of strings
err = index.Index("k", struct {
Messages []string
Another string
MoreData []string
}{
Messages: []string{
"bleve",
"bleve",
},
Another: "text",
MoreData: []string{
"a",
"b",
"c",
"bleve",
},
})
if err != nil {
t.Fatal(err)
}
// search for it in the messages field
tq := NewTermQuery("bleve")
tq.SetField("Messages")
tsr := NewSearchRequest(tq)
tsr.IncludeLocations = true
results, err := index.Search(tsr)
if err != nil {
t.Fatal(err)
}
if results.Total != 1 {
t.Fatalf("expected 1 result, got %d", results.Total)
}
if len(results.Hits[0].Locations["Messages"]["bleve"]) != 2 {
t.Fatalf("expected 2 locations of 'bleve', got %d", len(results.Hits[0].Locations["Messages"]["bleve"]))
}
if results.Hits[0].Locations["Messages"]["bleve"][0].ArrayPositions[0] != 0 {
t.Errorf("expected array position to be 0")
}
if results.Hits[0].Locations["Messages"]["bleve"][1].ArrayPositions[0] != 1 {
t.Errorf("expected array position to be 1")
}
// search for it in all
tq = NewTermQuery("bleve")
tsr = NewSearchRequest(tq)
tsr.IncludeLocations = true
results, err = index.Search(tsr)
if err != nil {
t.Fatal(err)
}
if results.Total != 1 {
t.Fatalf("expected 1 result, got %d", results.Total)
}
if len(results.Hits[0].Locations["Messages"]["bleve"]) != 2 {
t.Fatalf("expected 2 locations of 'bleve', got %d", len(results.Hits[0].Locations["Messages"]["bleve"]))
}
if results.Hits[0].Locations["Messages"]["bleve"][0].ArrayPositions[0] != 0 {
t.Errorf("expected array position to be 0")
}
if results.Hits[0].Locations["Messages"]["bleve"][1].ArrayPositions[0] != 1 {
t.Errorf("expected array position to be 1")
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}
func TestBooleanFieldMappingIssue109(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
m := NewIndexMapping()
m.DefaultMapping = NewDocumentMapping()
m.DefaultMapping.AddFieldMappingsAt("Bool", NewBooleanFieldMapping())
index, err := New(tmpIndexPath, m)
if err != nil {
t.Fatal(err)
}
type doc struct {
Bool bool
}
err = index.Index("true", &doc{Bool: true})
if err != nil {
t.Fatal(err)
}
err = index.Index("false", &doc{Bool: false})
if err != nil {
t.Fatal(err)
}
q := NewBoolFieldQuery(true)
q.SetField("Bool")
sreq := NewSearchRequest(q)
sres, err := index.Search(sreq)
if err != nil {
t.Fatal(err)
}
if sres.Total != 1 {
t.Errorf("expected 1 results, got %d", sres.Total)
}
q = NewBoolFieldQuery(false)
q.SetField("Bool")
sreq = NewSearchRequest(q)
sres, err = index.Search(sreq)
if err != nil {
t.Fatal(err)
}
if sres.Total != 1 {
t.Errorf("expected 1 results, got %d", sres.Total)
}
sreq = NewSearchRequest(NewBoolFieldQuery(true))
sres, err = index.Search(sreq)
if err != nil {
t.Fatal(err)
}
if sres.Total != 1 {
t.Errorf("expected 1 results, got %d", sres.Total)
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}
func TestSearchTimeout(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
// first run a search with an absurdly long timeout (should succeed)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
query := NewTermQuery("water")
req := NewSearchRequest(query)
_, err = index.SearchInContext(ctx, req)
if err != nil {
t.Fatal(err)
}
// now run a search again with an absurdly low timeout (should timeout)
ctx, cancel = context.WithTimeout(context.Background(), 1*time.Microsecond)
defer cancel()
sq := &slowQuery{
actual: query,
delay: 50 * time.Millisecond, // on Windows timer resolution is 15ms
}
req.Query = sq
_, err = index.SearchInContext(ctx, req)
if err != context.DeadlineExceeded {
t.Fatalf("expected %v, got: %v", context.DeadlineExceeded, err)
}
// now run a search with a long timeout, but with a long query, and cancel it
ctx, cancel = context.WithTimeout(context.Background(), 10*time.Second)
sq = &slowQuery{
actual: query,
delay: 100 * time.Millisecond, // on Windows timer resolution is 15ms
}
req = NewSearchRequest(sq)
cancel()
_, err = index.SearchInContext(ctx, req)
if err != context.Canceled {
t.Fatalf("expected %v, got: %v", context.Canceled, err)
}
}
// TestConfigCache exposes a concurrent map write with go 1.6
func TestConfigCache(t *testing.T) {
for i := 0; i < 100; i++ {
go func() {
_, err := Config.Cache.HighlighterNamed(Config.DefaultHighlighter)
if err != nil {
t.Error(err)
}
}()
}
}
func TestBatchRaceBug260(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
i, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
defer func() {
err := i.Close()
if err != nil {
t.Fatal(err)
}
}()
b := i.NewBatch()
err = b.Index("1", 1)
if err != nil {
t.Fatal(err)
}
err = i.Batch(b)
if err != nil {
t.Fatal(err)
}
b.Reset()
err = b.Index("2", 2)
if err != nil {
t.Fatal(err)
}
err = i.Batch(b)
if err != nil {
t.Fatal(err)
}
b.Reset()
}
func BenchmarkBatchOverhead(b *testing.B) {
tmpIndexPath := createTmpIndexPath(b)
defer cleanupTmpIndexPath(b, tmpIndexPath)
m := NewIndexMapping()
i, err := NewUsing(tmpIndexPath, m, Config.DefaultIndexType, null.Name, nil)
if err != nil {
b.Fatal(err)
}
for n := 0; n < b.N; n++ {
// put 1000 items in a batch
batch := i.NewBatch()
for i := 0; i < 1000; i++ {
err = batch.Index(fmt.Sprintf("%d", i), map[string]interface{}{"name": "bleve"})
if err != nil {
b.Fatal(err)
}
}
err = i.Batch(batch)
if err != nil {
b.Fatal(err)
}
batch.Reset()
}
}
func TestOpenReadonlyMultiple(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
// build an index and close it
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
doca := map[string]interface{}{
"name": "marty",
"desc": "gophercon india",
}
err = index.Index("a", doca)
if err != nil {
t.Fatal(err)
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
// now open it read-only
index, err = OpenUsing(tmpIndexPath, map[string]interface{}{
"read_only": true,
})
if err != nil {
t.Fatal(err)
}
// now open it again
index2, err := OpenUsing(tmpIndexPath, map[string]interface{}{
"read_only": true,
})
if err != nil {
t.Fatal(err)
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
err = index2.Close()
if err != nil {
t.Fatal(err)
}
}
// TestBug408 tests for VERY large values of size, even though actual result
// set may be reasonable size
func TestBug408(t *testing.T) {
type TestStruct struct {
ID string `json:"id"`
UserID *string `json:"user_id"`
}
docMapping := NewDocumentMapping()
docMapping.AddFieldMappingsAt("id", NewTextFieldMapping())
docMapping.AddFieldMappingsAt("user_id", NewTextFieldMapping())
indexMapping := NewIndexMapping()
indexMapping.DefaultMapping = docMapping
index, err := NewMemOnly(indexMapping)
if err != nil {
t.Fatal(err)
}
numToTest := 10
matchUserID := "match"
noMatchUserID := "no_match"
matchingDocIds := make(map[string]struct{})
for i := 0; i < numToTest; i++ {
ds := &TestStruct{"id_" + strconv.Itoa(i), nil}
if i%2 == 0 {
ds.UserID = &noMatchUserID
} else {
ds.UserID = &matchUserID
matchingDocIds[ds.ID] = struct{}{}
}
err = index.Index(ds.ID, ds)
if err != nil {
t.Fatal(err)
}
}
cnt, err := index.DocCount()
if err != nil {
t.Fatal(err)
}
if int(cnt) != numToTest {
t.Fatalf("expected %d documents in index, got %d", numToTest, cnt)
}
q := NewTermQuery(matchUserID)
q.SetField("user_id")
searchReq := NewSearchRequestOptions(q, math.MaxInt32, 0, false)
results, err := index.Search(searchReq)
if err != nil {
t.Fatal(err)
}
if int(results.Total) != numToTest/2 {
t.Fatalf("expected %d search hits, got %d", numToTest/2, results.Total)
}
for _, result := range results.Hits {
if _, found := matchingDocIds[result.ID]; !found {
t.Fatalf("document with ID %s not in results as expected", result.ID)
}
}
}
func TestIndexAdvancedCountMatchSearch(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
var wg sync.WaitGroup
errChan := make(chan error, 10)
for i := 0; i < 10; i++ {
wg.Add(1)
go func(i int) {
defer wg.Done()
b := index.NewBatch()
for j := 0; j < 200; j++ {
id := fmt.Sprintf("%d", (i*200)+j)
doc := document.NewDocument(id)
doc.AddField(document.NewTextField("body", []uint64{}, []byte("match")))
doc.AddField(document.NewCompositeField("_all", true, []string{}, []string{}))
err := b.IndexAdvanced(doc)
if err != nil {
errChan <- err
return
}
}
err := index.Batch(b)
if err != nil {
errChan <- err
return
}
}(i)
}
wg.Wait()
close(errChan)
for err := range errChan {
if err != nil {
t.Fatal(err)
}
}
// search for something that should match all documents
sr, err := index.Search(NewSearchRequest(NewMatchQuery("match")))
if err != nil {
t.Fatal(err)
}
// get the index document count
dc, err := index.DocCount()
if err != nil {
t.Fatal(err)
}
// make sure test is working correctly, doc count should 2000
if dc != 2000 {
t.Errorf("expected doc count 2000, got %d", dc)
}
// make sure our search found all the documents
if dc != sr.Total {
t.Errorf("expected search result total %d to match doc count %d", sr.Total, dc)
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
}
func benchmarkSearchOverhead(indexType string, b *testing.B) {
tmpIndexPath := createTmpIndexPath(b)
defer cleanupTmpIndexPath(b, tmpIndexPath)
index, err := NewUsing(tmpIndexPath, NewIndexMapping(),
indexType, Config.DefaultKVStore, nil)
if err != nil {
b.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
b.Fatal(err)
}
}()
elements := []string{"air", "water", "fire", "earth"}
for j := 0; j < 10000; j++ {
err = index.Index(fmt.Sprintf("%d", j),
map[string]interface{}{"name": elements[j%len(elements)]})
if err != nil {
b.Fatal(err)
}
}
query1 := NewTermQuery("water")
query2 := NewTermQuery("fire")
query := NewDisjunctionQuery(query1, query2)
req := NewSearchRequest(query)
b.ResetTimer()
for n := 0; n < b.N; n++ {
_, err = index.Search(req)
if err != nil {
b.Fatal(err)
}
}
}
func BenchmarkUpsidedownSearchOverhead(b *testing.B) {
benchmarkSearchOverhead(upsidedown.Name, b)
}
func BenchmarkScorchSearchOverhead(b *testing.B) {
benchmarkSearchOverhead(scorch.Name, b)
}
func TestSearchQueryCallback(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
index, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
query := NewTermQuery("water")
req := NewSearchRequest(query)
expErr := fmt.Errorf("MEM_LIMIT_EXCEEDED")
f := func(size uint64) error {
// the intended usage of this callback is to see the estimated
// memory usage before executing, and possibly abort early
// in this test we simulate returning such an error
return expErr
}
ctx := context.WithValue(context.Background(), SearchQueryStartCallbackKey, SearchQueryStartCallbackFn(f))
_, err = index.SearchInContext(ctx, req)
if err != expErr {
t.Fatalf("Expected: %v, Got: %v", expErr, err)
}
}
func TestBatchMerge(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
doca := map[string]interface{}{
"name": "scorch",
"desc": "gophercon india",
"nation": "india",
}
batchA := idx.NewBatch()
err = batchA.Index("a", doca)
if err != nil {
t.Error(err)
}
batchA.SetInternal([]byte("batchkA"), []byte("batchvA"))
docb := map[string]interface{}{
"name": "moss",
"desc": "gophercon MV",
}
batchB := idx.NewBatch()
err = batchB.Index("b", docb)
if err != nil {
t.Error(err)
}
batchB.SetInternal([]byte("batchkB"), []byte("batchvB"))
docC := map[string]interface{}{
"name": "blahblah",
"desc": "inProgress",
"country": "usa",
}
batchC := idx.NewBatch()
err = batchC.Index("c", docC)
if err != nil {
t.Error(err)
}
batchC.SetInternal([]byte("batchkC"), []byte("batchvC"))
batchC.SetInternal([]byte("batchkB"), []byte("batchvBNew"))
batchC.Delete("a")
batchC.DeleteInternal([]byte("batchkA"))
batchA.Merge(batchB)
if batchA.Size() != 4 {
t.Errorf("expected batch size 4, got %d", batchA.Size())
}
batchA.Merge(batchC)
if batchA.Size() != 6 {
t.Errorf("expected batch size 6, got %d", batchA.Size())
}
err = idx.Batch(batchA)
if err != nil {
t.Fatal(err)
}
// close the index, open it again, and try some more things
err = idx.Close()
if err != nil {
t.Fatal(err)
}
idx, err = Open(tmpIndexPath)
if err != nil {
t.Fatal(err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
count, err := idx.DocCount()
if err != nil {
t.Fatal(err)
}
if count != 2 {
t.Errorf("expected doc count 2, got %d", count)
}
doc, err := idx.Document("c")
if err != nil {
t.Fatal(err)
}
val, err := idx.GetInternal([]byte("batchkB"))
if err != nil {
t.Fatal(err)
}
if val == nil || string(val) != "batchvBNew" {
t.Errorf("expected val: batchvBNew , got %s", val)
}
val, err = idx.GetInternal([]byte("batchkA"))
if err != nil {
t.Fatal(err)
}
if val != nil {
t.Errorf("expected nil, got %s", val)
}
foundNameField := false
doc.VisitFields(func(field index.Field) {
if field.Name() == "name" && string(field.Value()) == "blahblah" {
foundNameField = true
}
})
if !foundNameField {
t.Errorf("expected to find field named 'name' with value 'blahblah'")
}
fields, err := idx.Fields()
if err != nil {
t.Fatal(err)
}
expectedFields := map[string]bool{
"_all": false,
"name": false,
"desc": false,
"country": false,
}
if len(fields) < len(expectedFields) {
t.Fatalf("expected %d fields got %d", len(expectedFields), len(fields))
}
for _, f := range fields {
expectedFields[f] = true
}
for ef, efp := range expectedFields {
if !efp {
t.Errorf("field %s is missing", ef)
}
}
}
func TestBug1096(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
// use default mapping
mapping := NewIndexMapping()
// create a scorch index with default SAFE batches
var idx Index
idx, err = NewUsing(tmpIndexPath, mapping, "scorch", "scorch", nil)
if err != nil {
log.Fatal(err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
// create a single batch instance that we will reuse
// this should be safe because we have single goroutine
// and we always wait for batch execution to finish
batch := idx.NewBatch()
// number of batches to execute
for i := 0; i < 10; i++ {
// number of documents to put into the batch
for j := 0; j < 91; j++ {
// create a doc id 0-90 (important so that we get id's 9 and 90)
// this could duplicate something already in the index
// this too should be OK and update the item in the index
id := fmt.Sprintf("%d", j)
err = batch.Index(id, map[string]interface{}{
"name": id,
"batch": fmt.Sprintf("%d", i),
})
if err != nil {
log.Fatal(err)
}
}
// execute the batch
err = idx.Batch(batch)
if err != nil {
log.Fatal(err)
}
// reset the batch before reusing it
batch.Reset()
}
// search for docs having name starting with the number 9
q := NewWildcardQuery("9*")
q.SetField("name")
req := NewSearchRequestOptions(q, 1000, 0, false)
req.Fields = []string{"*"}
var res *SearchResult
res, err = idx.Search(req)
if err != nil {
log.Fatal(err)
}
// we expect only 2 hits, for docs 9 and 90
if res.Total > 2 {
t.Fatalf("expected only 2 hits '9' and '90', got %v", res)
}
}
func TestDataRaceBug1092(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
// use default mapping
mapping := NewIndexMapping()
var idx Index
idx, err = NewUsing(tmpIndexPath, mapping, upsidedown.Name, boltdb.Name, nil)
if err != nil {
log.Fatal(err)
}
defer func() {
cerr := idx.Close()
if cerr != nil {
t.Fatal(cerr)
}
}()
batch := idx.NewBatch()
for i := 0; i < 10; i++ {
err = idx.Batch(batch)
if err != nil {
t.Error(err)
}
batch.Reset()
}
}
func TestBatchRaceBug1149(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
i, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
defer func() {
err := i.Close()
if err != nil {
t.Fatal(err)
}
}()
testBatchRaceBug1149(t, i)
}
func TestBatchRaceBug1149Scorch(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
i, err := NewUsing(tmpIndexPath, NewIndexMapping(), "scorch", "scorch", nil)
if err != nil {
t.Fatal(err)
}
defer func() {
err := i.Close()
if err != nil {
t.Fatal(err)
}
}()
testBatchRaceBug1149(t, i)
}
func testBatchRaceBug1149(t *testing.T, i Index) {
b := i.NewBatch()
b.Delete("1")
err = i.Batch(b)
if err != nil {
t.Fatal(err)
}
b.Reset()
err = i.Batch(b)
if err != nil {
t.Fatal(err)
}
b.Reset()
}
func TestOptimisedConjunctionSearchHits(t *testing.T) {
scorch.OptimizeDisjunctionUnadorned = false
defer func() {
scorch.OptimizeDisjunctionUnadorned = true
}()
defer func() {
err := os.RemoveAll("testidx")
if err != nil {
t.Fatal(err)
}
}()
idx, err := NewUsing("testidx", NewIndexMapping(), "scorch", "scorch", nil)
if err != nil {
t.Fatal(err)
}
doca := map[string]interface{}{
"country": "united",
"name": "Mercure Hotel",
"directions": "B560 and B56 Follow signs to the M56",
}
docb := map[string]interface{}{
"country": "united",
"name": "Mercure Altrincham Bowdon Hotel",
"directions": "A570 and A57 Follow signs to the M56 Manchester Airport",
}
docc := map[string]interface{}{
"country": "india united",
"name": "Sonoma Hotel",
"directions": "Northwest",
}
docd := map[string]interface{}{
"country": "United Kingdom",
"name": "Cresta Court Hotel",
"directions": "junction of A560 and A56",
}
b := idx.NewBatch()
err = b.Index("a", doca)
if err != nil {
t.Error(err)
}
err = b.Index("b", docb)
if err != nil {
t.Error(err)
}
err = b.Index("c", docc)
if err != nil {
t.Error(err)
}
err = b.Index("d", docd)
if err != nil {
t.Error(err)
}
// execute the batch
err = idx.Batch(b)
if err != nil {
log.Fatal(err)
}
mq := NewMatchQuery("united")
mq.SetField("country")
cq := NewConjunctionQuery(mq)
mq1 := NewMatchQuery("hotel")
mq1.SetField("name")
cq.AddQuery(mq1)
mq2 := NewMatchQuery("56")
mq2.SetField("directions")
mq2.SetFuzziness(1)
cq.AddQuery(mq2)
req := NewSearchRequest(cq)
req.Score = "none"
res, err := idx.Search(req)
if err != nil {
t.Fatal(err)
}
hitsWithOutScore := res.Total
req = NewSearchRequest(cq)
req.Score = ""
res, err = idx.Search(req)
if err != nil {
t.Fatal(err)
}
hitsWithScore := res.Total
if hitsWithOutScore != hitsWithScore {
t.Errorf("expected %d hits without score, got %d", hitsWithScore, hitsWithOutScore)
}
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}
func TestIndexMappingDocValuesDynamic(t *testing.T) {
im := NewIndexMapping()
// DocValuesDynamic's default is true
// Now explicitly set it to false
im.DocValuesDynamic = false
// Next, retrieve the JSON dump of the index mapping
var data []byte
data, err = json.Marshal(im)
if err != nil {
t.Fatal(err)
}
// Now, edit an unrelated setting in the index mapping
var m map[string]interface{}
err = json.Unmarshal(data, &m)
if err != nil {
t.Fatal(err)
}
m["index_dynamic"] = false
data, err = json.Marshal(m)
if err != nil {
t.Fatal(err)
}
// Unmarshal back the changes into the index mapping struct
if err = im.UnmarshalJSON(data); err != nil {
t.Fatal(err)
}
// Expect DocValuesDynamic to remain false!
if im.DocValuesDynamic {
t.Fatalf("Expected DocValuesDynamic to remain false after the index mapping edit")
}
}
func TestCopyIndex(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doca := map[string]interface{}{
"name": "tester",
"desc": "gophercon india testing",
}
err = idx.Index("a", doca)
if err != nil {
t.Error(err)
}
docy := map[string]interface{}{
"name": "jasper",
"desc": "clojure",
}
err = idx.Index("y", docy)
if err != nil {
t.Error(err)
}
err = idx.Delete("y")
if err != nil {
t.Error(err)
}
docx := map[string]interface{}{
"name": "rose",
"desc": "xoogler",
}
err = idx.Index("x", docx)
if err != nil {
t.Error(err)
}
err = idx.SetInternal([]byte("status"), []byte("pending"))
if err != nil {
t.Error(err)
}
docb := map[string]interface{}{
"name": "sree",
"desc": "cbft janitor",
}
batch := idx.NewBatch()
err = batch.Index("b", docb)
if err != nil {
t.Error(err)
}
batch.Delete("x")
batch.SetInternal([]byte("batchi"), []byte("batchv"))
batch.DeleteInternal([]byte("status"))
err = idx.Batch(batch)
if err != nil {
t.Error(err)
}
val, err := idx.GetInternal([]byte("batchi"))
if err != nil {
t.Error(err)
}
if string(val) != "batchv" {
t.Errorf("expected 'batchv', got '%s'", val)
}
val, err = idx.GetInternal([]byte("status"))
if err != nil {
t.Error(err)
}
if val != nil {
t.Errorf("expected nil, got '%s'", val)
}
err = idx.SetInternal([]byte("seqno"), []byte("7"))
if err != nil {
t.Error(err)
}
err = idx.SetInternal([]byte("status"), []byte("ready"))
if err != nil {
t.Error(err)
}
err = idx.DeleteInternal([]byte("status"))
if err != nil {
t.Error(err)
}
val, err = idx.GetInternal([]byte("status"))
if err != nil {
t.Error(err)
}
if val != nil {
t.Errorf("expected nil, got '%s'", val)
}
val, err = idx.GetInternal([]byte("seqno"))
if err != nil {
t.Error(err)
}
if string(val) != "7" {
t.Errorf("expected '7', got '%s'", val)
}
count, err := idx.DocCount()
if err != nil {
t.Fatal(err)
}
if count != 2 {
t.Errorf("expected doc count 2, got %d", count)
}
doc, err := idx.Document("a")
if err != nil {
t.Fatal(err)
}
foundNameField := false
doc.VisitFields(func(field index.Field) {
if field.Name() == "name" && string(field.Value()) == "tester" {
foundNameField = true
}
})
if !foundNameField {
t.Errorf("expected to find field named 'name' with value 'tester'")
}
fields, err := idx.Fields()
if err != nil {
t.Fatal(err)
}
expectedFields := map[string]bool{
"_all": false,
"name": false,
"desc": false,
}
if len(fields) < len(expectedFields) {
t.Fatalf("expected %d fields got %d", len(expectedFields), len(fields))
}
for _, f := range fields {
expectedFields[f] = true
}
for ef, efp := range expectedFields {
if !efp {
t.Errorf("field %s is missing", ef)
}
}
// now create a copy of the index, and repeat assertions on it
copyableIndex, ok := idx.(IndexCopyable)
if !ok {
t.Fatal("index doesn't support copy")
}
backupIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, backupIndexPath)
err = copyableIndex.CopyTo(FileSystemDirectory(backupIndexPath))
if err != nil {
t.Fatalf("error copying the index: %v", err)
}
// open the copied index
idxCopied, err := Open(backupIndexPath)
if err != nil {
t.Fatalf("unable to open copy index")
}
defer func() {
err := idxCopied.Close()
if err != nil {
t.Fatalf("error closing copy index: %v", err)
}
}()
// assertions on copied index
copyCount, err := idxCopied.DocCount()
if err != nil {
t.Fatal(err)
}
if copyCount != 2 {
t.Errorf("expected doc count 2, got %d", copyCount)
}
copyDoc, err := idxCopied.Document("a")
if err != nil {
t.Fatal(err)
}
copyFoundNameField := false
copyDoc.VisitFields(func(field index.Field) {
if field.Name() == "name" && string(field.Value()) == "tester" {
copyFoundNameField = true
}
})
if !copyFoundNameField {
t.Errorf("expected copy index to find field named 'name' with value 'tester'")
}
copyFields, err := idx.Fields()
if err != nil {
t.Fatal(err)
}
copyExpectedFields := map[string]bool{
"_all": false,
"name": false,
"desc": false,
}
if len(copyFields) < len(copyExpectedFields) {
t.Fatalf("expected %d fields got %d", len(copyExpectedFields), len(copyFields))
}
for _, f := range copyFields {
copyExpectedFields[f] = true
}
for ef, efp := range copyExpectedFields {
if !efp {
t.Errorf("copy field %s is missing", ef)
}
}
}
func TestFuzzyScoring(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
mp := NewIndexMapping()
mp.DefaultAnalyzer = "simple"
idx, err := New(tmpIndexPath, mp)
if err != nil {
t.Fatal(err)
}
batch := idx.NewBatch()
docs := []map[string]interface{}{
{
"textField": "ab",
},
{
"textField": "abc",
},
{
"textField": "abcd",
},
}
for _, doc := range docs {
err := batch.Index(fmt.Sprintf("%v", doc["textField"]), doc)
if err != nil {
t.Fatal(err)
}
}
err = idx.Batch(batch)
if err != nil {
t.Fatal(err)
}
query := NewFuzzyQuery("ab")
query.Fuzziness = 2
searchRequest := NewSearchRequestOptions(query, 10, 0, true)
res, err := idx.Search(searchRequest)
if err != nil {
t.Error(err)
}
maxScore := res.Hits[0].Score
for i, hit := range res.Hits {
if maxScore/float64(i+1) != hit.Score {
t.Errorf("expected score - %f, got score - %f", maxScore/float64(i+1), hit.Score)
}
}
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}
================================================
FILE: index_update.go
================================================
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/mapping"
index "github.com/blevesearch/bleve_index_api"
)
// Store all the fields that interact with the data
// from a document path
type pathInfo struct {
fieldMapInfo []*fieldMapInfo
dynamic bool
path string
analyser string
parentPath string
}
// Store the field information with respect to the
// document paths
type fieldMapInfo struct {
fieldMapping *mapping.FieldMapping
analyzer string
datetimeParser string
rootName string
parent *pathInfo
}
// Compare two index mappings to identify all of the updatable changes
func DeletedFields(ori, upd *mapping.IndexMappingImpl) (map[string]*index.UpdateFieldInfo, error) {
// Compare all of the top level fields in an index mapping
err := compareMappings(ori, upd)
if err != nil {
return nil, err
}
// Check for new mappings present in the type mappings
// of the updated compared to the original
for name, updDMapping := range upd.TypeMapping {
err = checkUpdatedMapping(ori.TypeMapping[name], updDMapping)
if err != nil {
return nil, err
}
}
// Check for new mappings present in the default mappings
// of the updated compared to the original
err = checkUpdatedMapping(ori.DefaultMapping, upd.DefaultMapping)
if err != nil {
return nil, err
}
oriPaths := make(map[string]*pathInfo)
updPaths := make(map[string]*pathInfo)
// Go through each mapping present in the original
// and consolidate according to the document paths
for name, oriDMapping := range ori.TypeMapping {
addPathInfo(oriPaths, "", oriDMapping, ori, nil, name)
}
addPathInfo(oriPaths, "", ori.DefaultMapping, ori, nil, "")
// Go through each mapping present in the updated
// and consolidate according to the document paths
for name, updDMapping := range upd.TypeMapping {
addPathInfo(updPaths, "", updDMapping, upd, nil, name)
}
addPathInfo(updPaths, "", upd.DefaultMapping, upd, nil, "")
// Compare all components of custom analysis currently in use
err = compareCustomComponents(oriPaths, updPaths, ori, upd)
if err != nil {
return nil, err
}
// Compare both the mappings based on the document paths
// and create a list of index, docvalues, store differences
// for every single field possible
fieldInfo := make(map[string]*index.UpdateFieldInfo, len(oriPaths))
for path, info := range oriPaths {
err = addFieldInfo(fieldInfo, info, updPaths[path])
if err != nil {
return nil, err
}
}
// Remove entries from the list with no changes between the
// original and the updated mapping
for name, info := range fieldInfo {
if !info.Deleted && !info.Index && !info.DocValues && !info.Store {
delete(fieldInfo, name)
}
// A field cannot be completely deleted with any dynamic value turned on
if info.Deleted {
if upd.IndexDynamic {
return nil, fmt.Errorf("mapping cannot be removed when index dynamic is true")
}
if upd.StoreDynamic {
return nil, fmt.Errorf("mapping cannot be removed when store dynamic is true")
}
if upd.DocValuesDynamic {
return nil, fmt.Errorf("mapping cannot be removed when docvalues dynamic is true")
}
}
}
return fieldInfo, nil
}
// Ensures none of the top level index mapping fields have changed
func compareMappings(ori, upd *mapping.IndexMappingImpl) error {
if ori.TypeField != upd.TypeField &&
(len(ori.TypeMapping) != 0 || len(upd.TypeMapping) != 0) {
return fmt.Errorf("type field cannot be changed when type mappings are present")
}
if ori.DefaultType != upd.DefaultType {
return fmt.Errorf("default type cannot be changed")
}
if ori.IndexDynamic != upd.IndexDynamic {
return fmt.Errorf("index dynamic cannot be changed")
}
if ori.StoreDynamic != upd.StoreDynamic {
return fmt.Errorf("store dynamic cannot be changed")
}
if ori.DocValuesDynamic != upd.DocValuesDynamic {
return fmt.Errorf("docvalues dynamic cannot be changed")
}
if ori.DefaultAnalyzer != upd.DefaultAnalyzer && upd.IndexDynamic {
return fmt.Errorf("default analyser cannot be changed if index dynamic is true")
}
if ori.DefaultDateTimeParser != upd.DefaultDateTimeParser && upd.IndexDynamic {
return fmt.Errorf("default datetime parser cannot be changed if index dynamic is true")
}
// Scoring model changes between "", "tf-idf" and "bm25" require no index changes to be made
if ori.ScoringModel != upd.ScoringModel {
if ori.ScoringModel != "" && ori.ScoringModel != index.TFIDFScoring && ori.ScoringModel != index.BM25Scoring ||
upd.ScoringModel != "" && upd.ScoringModel != index.TFIDFScoring && upd.ScoringModel != index.BM25Scoring {
return fmt.Errorf("scoring model can only be changed between \"\", %q and %q", index.TFIDFScoring, index.BM25Scoring)
}
}
return nil
}
// Ensures updated document mapping does not contain new
// field mappings or document mappings
func checkUpdatedMapping(ori, upd *mapping.DocumentMapping) error {
// Check to verify both original and updated are not nil
// and are enabled before proceeding
if ori == nil {
if upd == nil || !upd.Enabled {
return nil
}
return fmt.Errorf("updated index mapping contains new properties")
}
if upd == nil || !upd.Enabled {
return nil
}
if ori.Nested != upd.Nested {
return fmt.Errorf("nested property cannot be changed")
}
var err error
// Recursively go through the child mappings
for name, updDMapping := range upd.Properties {
err = checkUpdatedMapping(ori.Properties[name], updDMapping)
if err != nil {
return err
}
}
// Simple checks to ensure no new field mappings present
// in updated
// Create a map of original field names for O(1) lookup
oriFieldNames := make(map[string]bool, len(ori.Fields))
for _, fMapping := range ori.Fields {
oriFieldNames[fMapping.Name] = true
}
for _, updFMapping := range upd.Fields {
if !oriFieldNames[updFMapping.Name] {
return fmt.Errorf("updated index mapping contains new fields")
}
}
return nil
}
// Adds all of the field mappings while maintaining a tree of the document structure
// to ensure traversal and verification is possible incase of multiple mappings defined
// for a single field or multiple document fields' data getting written to a single zapx field
func addPathInfo(paths map[string]*pathInfo, name string, mp *mapping.DocumentMapping,
im *mapping.IndexMappingImpl, parent *pathInfo, rootName string) {
// Early exit if mapping has been disabled
// Comparisions later on will be done with a nil object
if !mp.Enabled {
return
}
// Consolidate path information like index dynamic across multiple
// mappings if path is the same
var pInfo *pathInfo
if val, ok := paths[name]; ok {
pInfo = val
} else {
pInfo = &pathInfo{
fieldMapInfo: make([]*fieldMapInfo, 0),
}
pInfo.dynamic = mp.Dynamic && im.IndexDynamic
pInfo.analyser = im.AnalyzerNameForPath(name)
}
pInfo.dynamic = (pInfo.dynamic || mp.Dynamic) && im.IndexDynamic
pInfo.path = name
if parent != nil {
pInfo.parentPath = parent.path
}
// Recursively add path information for all child mappings
for cName, cMapping := range mp.Properties {
pathName := cName
if name != "" {
pathName = name + "." + cName
}
addPathInfo(paths, pathName, cMapping, im, pInfo, rootName)
}
// Add field mapping information keeping the document structure intact
for _, fMap := range mp.Fields {
fieldMapInfo := &fieldMapInfo{
fieldMapping: fMap,
rootName: rootName,
parent: pInfo,
}
pInfo.fieldMapInfo = append(pInfo.fieldMapInfo, fieldMapInfo)
}
paths[name] = pInfo
}
// Compares all of the custom analysis components in use
func compareCustomComponents(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error {
// Compare all analysers currently in use
err := compareAnalysers(oriPaths, updPaths, ori, upd)
if err != nil {
return err
}
// Compare all datetime parsers currently in use
err = compareDateTimeParsers(oriPaths, updPaths, ori, upd)
if err != nil {
return err
}
// Compare all synonum sources
err = compareSynonymSources(ori, upd)
if err != nil {
return err
}
// Compare all char filters, tokenizers, token filters and token maps
err = compareAnalyserSubcomponents(ori, upd)
if err != nil {
return err
}
return nil
}
// Compares all analysers currently in use
// Standard analysers not in custom analysis are not compared
// Analysers in custom analysis but not in use are not compared
func compareAnalysers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error {
oriAnalyzers := make(map[string]interface{})
updAnalyzers := make(map[string]interface{})
extractAnalyzers := func(paths map[string]*pathInfo, customAnalyzers map[string]map[string]interface{},
analyzers map[string]interface{}, indexMapping *mapping.IndexMappingImpl) {
for path, info := range paths {
for _, fInfo := range info.fieldMapInfo {
if fInfo.fieldMapping.Type == "text" {
analyzerName := indexMapping.AnalyzerNameForPath(path)
fInfo.analyzer = analyzerName
if val, ok := customAnalyzers[analyzerName]; ok {
analyzers[analyzerName] = val
}
}
}
}
}
extractAnalyzers(oriPaths, ori.CustomAnalysis.Analyzers, oriAnalyzers, ori)
extractAnalyzers(updPaths, upd.CustomAnalysis.Analyzers, updAnalyzers, upd)
for name, anUpd := range updAnalyzers {
if anOri, ok := oriAnalyzers[name]; ok {
if !reflect.DeepEqual(anUpd, anOri) {
return fmt.Errorf("analyser %s changed while being used by fields", name)
}
} else {
return fmt.Errorf("analyser %s newly added to an existing field", name)
}
}
return nil
}
// Compares all date time parsers currently in use
// Date time parsers in custom analysis but not in use are not compared
func compareDateTimeParsers(oriPaths, updPaths map[string]*pathInfo, ori, upd *mapping.IndexMappingImpl) error {
oriDateTimeParsers := make(map[string]interface{})
updDateTimeParsers := make(map[string]interface{})
extractDateTimeParsers := func(paths map[string]*pathInfo, customParsers map[string]map[string]interface{},
parsers map[string]interface{}, indexMapping *mapping.IndexMappingImpl) {
for _, info := range paths {
for _, fInfo := range info.fieldMapInfo {
if fInfo.fieldMapping.Type == "datetime" {
parserName := fInfo.fieldMapping.DateFormat
if parserName == "" {
parserName = indexMapping.DefaultDateTimeParser
}
fInfo.datetimeParser = parserName
if val, ok := customParsers[parserName]; ok {
parsers[parserName] = val
}
}
}
}
}
extractDateTimeParsers(oriPaths, ori.CustomAnalysis.DateTimeParsers, oriDateTimeParsers, ori)
extractDateTimeParsers(updPaths, upd.CustomAnalysis.DateTimeParsers, updDateTimeParsers, upd)
for name, dtUpd := range updDateTimeParsers {
if dtOri, ok := oriDateTimeParsers[name]; ok {
if !reflect.DeepEqual(dtUpd, dtOri) {
return fmt.Errorf("datetime parser %s changed while being used by fields", name)
}
} else {
return fmt.Errorf("datetime parser %s added to an existing field", name)
}
}
return nil
}
// Compares all synonym sources
// Synonym sources currently not in use are also compared
func compareSynonymSources(ori, upd *mapping.IndexMappingImpl) error {
if !reflect.DeepEqual(ori.CustomAnalysis.SynonymSources, upd.CustomAnalysis.SynonymSources) {
return fmt.Errorf("synonym sources cannot be changed")
}
return nil
}
// Compares all char filters, tokenizers, token filters and token maps
// Components not currently in use are also compared
func compareAnalyserSubcomponents(ori, upd *mapping.IndexMappingImpl) error {
if !reflect.DeepEqual(ori.CustomAnalysis.CharFilters, upd.CustomAnalysis.CharFilters) {
return fmt.Errorf("char filters cannot be changed")
}
if !reflect.DeepEqual(ori.CustomAnalysis.TokenFilters, upd.CustomAnalysis.TokenFilters) {
return fmt.Errorf("token filters cannot be changed")
}
if !reflect.DeepEqual(ori.CustomAnalysis.TokenMaps, upd.CustomAnalysis.TokenMaps) {
return fmt.Errorf("token maps cannot be changed")
}
if !reflect.DeepEqual(ori.CustomAnalysis.Tokenizers, upd.CustomAnalysis.Tokenizers) {
return fmt.Errorf("tokenizers cannot be changed")
}
return nil
}
// Compare all of the fields at a particular document path and add its field information
func addFieldInfo(fInfo map[string]*index.UpdateFieldInfo, ori, upd *pathInfo) error {
var info *index.UpdateFieldInfo
var err error
// Assume deleted or disabled mapping if upd is nil. Checks for ori being nil
// or upd having mappings not in orihave already been done before this stage
if upd == nil {
for _, oriFMapInfo := range ori.fieldMapInfo {
info, err = compareFieldMapping(oriFMapInfo.fieldMapping, nil)
if err != nil {
return err
}
err = validateFieldInfo(info, fInfo, ori, oriFMapInfo)
if err != nil {
return err
}
}
} else {
if upd.dynamic && ori.analyser != upd.analyser {
return fmt.Errorf("analyser has been changed for a dynamic mapping")
}
for _, oriFMapInfo := range ori.fieldMapInfo {
var updFMap *mapping.FieldMapping
var updAnalyser string
var updDatetimeParser string
// For multiple fields at a single document path, compare
// only with the matching ones
for _, updFMapInfo := range upd.fieldMapInfo {
if oriFMapInfo.rootName == updFMapInfo.rootName &&
oriFMapInfo.fieldMapping.Name == updFMapInfo.fieldMapping.Name {
updFMap = updFMapInfo.fieldMapping
if updFMap.Type == "text" {
updAnalyser = updFMapInfo.analyzer
} else if updFMap.Type == "datetime" {
updDatetimeParser = updFMapInfo.datetimeParser
}
}
}
// Compare analyser, datetime parser and synonym source before comparing
// the field mapping as it might not have this information
if updAnalyser != "" && oriFMapInfo.analyzer != updAnalyser {
return fmt.Errorf("analyser has been changed for a text field")
}
if updDatetimeParser != "" && oriFMapInfo.datetimeParser != updDatetimeParser {
return fmt.Errorf("datetime parser has been changed for a date time field")
}
info, err = compareFieldMapping(oriFMapInfo.fieldMapping, updFMap)
if err != nil {
return err
}
// Validate to ensure change is possible
// Needed if multiple mappings are aliased to the same field
err = validateFieldInfo(info, fInfo, ori, oriFMapInfo)
if err != nil {
return err
}
}
}
return nil
}
// Compares two field mappings against each other, checking for changes in index, store, doc values
// and complete deletiion of the mapping while noting that the changes made are doable based on
// other values like includeInAll and dynamic
// first return argument gives an empty fieldInfo if no changes detected
// second return argument gives a flag indicating whether any changes, if detected, are doable or if
// update is impossible
// third argument is an error explaining exactly why the change is not possible
func compareFieldMapping(original, updated *mapping.FieldMapping) (*index.UpdateFieldInfo, error) {
rv := &index.UpdateFieldInfo{}
if updated == nil {
if original != nil && !original.IncludeInAll {
rv.Deleted = true
return rv, nil
} else if original == nil {
return nil, fmt.Errorf("both field mappings cannot be nil")
}
return nil, fmt.Errorf("deleted field present in '_all' field")
} else if original == nil {
return nil, fmt.Errorf("matching field not found in original index mapping")
}
if original.Type != updated.Type {
return nil, fmt.Errorf("field type cannot be updated")
}
if original.Type == "text" {
if original.Analyzer != updated.Analyzer {
return nil, fmt.Errorf("analyzer cannot be updated for text fields")
}
}
if original.Type == "datetime" {
if original.DateFormat != updated.DateFormat {
return nil, fmt.Errorf("dateFormat cannot be updated for datetime fields")
}
}
if original.Type == "vector" || original.Type == "vector_base64" {
if original.Dims != updated.Dims {
return nil, fmt.Errorf("dimensions cannot be updated for vector and vector_base64 fields")
}
if original.Similarity != updated.Similarity {
return nil, fmt.Errorf("similarity cannot be updated for vector and vector_base64 fields")
}
if original.VectorIndexOptimizedFor != updated.VectorIndexOptimizedFor {
return nil, fmt.Errorf("vectorIndexOptimizedFor cannot be updated for vector and vector_base64 fields")
}
}
if original.IncludeInAll != updated.IncludeInAll {
return nil, fmt.Errorf("includeInAll cannot be changed")
}
if original.IncludeTermVectors != updated.IncludeTermVectors {
return nil, fmt.Errorf("includeTermVectors cannot be changed")
}
if original.SkipFreqNorm != updated.SkipFreqNorm {
return nil, fmt.Errorf("skipFreqNorm cannot be changed")
}
// Updating is not possible if store changes from true
// to false when the field is included in _all
if original.Store != updated.Store {
if updated.Store {
return nil, fmt.Errorf("store cannot be changed from false to true")
} else if updated.IncludeInAll {
return nil, fmt.Errorf("store cannot be changed if field present in `_all' field")
} else {
rv.Store = true
}
}
// Updating is not possible if index changes from true
// to false when the field is included in _all
if original.Index != updated.Index {
if updated.Index {
return nil, fmt.Errorf("index cannot be changed from false to true")
} else if updated.IncludeInAll {
return nil, fmt.Errorf("index cannot be changed if field present in `_all' field")
} else {
rv.Index = true
rv.DocValues = true
}
}
// Updating is not possible if docvalues changes from true
// to false when the field is included in _all
if original.DocValues != updated.DocValues {
if updated.DocValues {
return nil, fmt.Errorf("docvalues cannot be changed from false to true")
} else if updated.IncludeInAll {
return nil, fmt.Errorf("docvalues cannot be changed if field present in `_all' field")
} else {
rv.DocValues = true
}
}
return rv, nil
}
// After identifying changes, validate against the existing changes incase of duplicate fields.
// In such a situation, any conflicting changes found will abort the update process
func validateFieldInfo(newInfo *index.UpdateFieldInfo, fInfo map[string]*index.UpdateFieldInfo,
ori *pathInfo, oriFMapInfo *fieldMapInfo) error {
// Determine field name
fieldName := oriFMapInfo.fieldMapping.Name
if fieldName == "" {
fieldName = oriFMapInfo.parent.path
}
// Construct full name with parent path
var name string
if oriFMapInfo.parent.parentPath == "" {
name = fieldName
} else {
name = oriFMapInfo.parent.parentPath + "." + fieldName
}
if (newInfo.Deleted || newInfo.Index || newInfo.DocValues || newInfo.Store) && ori.dynamic {
return fmt.Errorf("updated field is under a dynamic property")
}
if oldInfo, ok := fInfo[name]; ok {
if !reflect.DeepEqual(oldInfo, newInfo) {
return fmt.Errorf("updated field impossible to verify because multiple mappings point to the same field name")
}
} else {
fInfo[name] = newInfo
}
return nil
}
================================================
FILE: index_update_test.go
================================================
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"context"
"encoding/json"
"fmt"
"math/rand"
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
"github.com/blevesearch/bleve/v2/analysis/analyzer/simple"
"github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
"github.com/blevesearch/bleve/v2/analysis/datetime/percent"
"github.com/blevesearch/bleve/v2/analysis/datetime/sanitized"
"github.com/blevesearch/bleve/v2/analysis/lang/en"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/letter"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/whitespace"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/index/scorch/mergeplan"
"github.com/blevesearch/bleve/v2/mapping"
index "github.com/blevesearch/bleve_index_api"
)
func TestCompareFieldMapping(t *testing.T) {
tests := []struct {
original *mapping.FieldMapping
updated *mapping.FieldMapping
indexFieldInfo *index.UpdateFieldInfo
err bool
}{
{ // both nil => error
original: nil,
updated: nil,
indexFieldInfo: nil,
err: true,
},
{ // updated nil => delete all
original: &mapping.FieldMapping{},
updated: nil,
indexFieldInfo: &index.UpdateFieldInfo{
Deleted: true,
},
err: false,
},
{ // type changed => not updatable
original: &mapping.FieldMapping{
Type: "text",
},
updated: &mapping.FieldMapping{
Type: "datetime",
},
indexFieldInfo: nil,
err: true,
},
{ // synonym source changed for text => updatable
original: &mapping.FieldMapping{
Type: "text",
SynonymSource: "a",
},
updated: &mapping.FieldMapping{
Type: "text",
SynonymSource: "b",
},
indexFieldInfo: &index.UpdateFieldInfo{},
err: false,
},
{ // analyser changed for text => not updatable
original: &mapping.FieldMapping{
Type: "text",
Analyzer: "a",
},
updated: &mapping.FieldMapping{
Type: "text",
Analyzer: "b",
},
indexFieldInfo: nil,
err: true,
},
{ // dims changed for vector => not updatable
original: &mapping.FieldMapping{
Type: "vector",
Dims: 128,
Similarity: "l2_norm",
VectorIndexOptimizedFor: "memory-efficient",
},
updated: &mapping.FieldMapping{
Type: "vector",
Dims: 1024,
Similarity: "l2_norm",
VectorIndexOptimizedFor: "memory-efficient",
},
indexFieldInfo: nil,
err: true,
},
{ // similarity changed for vectorbase64 => not updatable
original: &mapping.FieldMapping{
Type: "vector_base64",
Similarity: "l2_norm",
Dims: 128,
VectorIndexOptimizedFor: "memory-efficient",
},
updated: &mapping.FieldMapping{
Type: "vector_base64",
Similarity: "dot_product",
Dims: 128,
VectorIndexOptimizedFor: "memory-efficient",
},
indexFieldInfo: nil,
err: true,
},
{ // vectorindexoptimizedfor chagned for vector => not updatable
original: &mapping.FieldMapping{
Type: "vector",
Similarity: "dot_product",
Dims: 128,
VectorIndexOptimizedFor: "memory-efficient",
},
updated: &mapping.FieldMapping{
Type: "vector",
Similarity: "dot_product",
Dims: 128,
VectorIndexOptimizedFor: "latency",
},
indexFieldInfo: nil,
err: true,
},
{ // includeinall changed => not updatable
original: &mapping.FieldMapping{
Type: "numeric",
IncludeInAll: true,
},
updated: &mapping.FieldMapping{
Type: "numeric",
IncludeInAll: false,
},
indexFieldInfo: nil,
err: true,
},
{ //includetermvectors changed => not updatable
original: &mapping.FieldMapping{
Type: "numeric",
IncludeTermVectors: false,
},
updated: &mapping.FieldMapping{
Type: "numeric",
IncludeTermVectors: true,
},
indexFieldInfo: nil,
err: true,
},
{ // store changed after all checks => updatable with store delete
original: &mapping.FieldMapping{
Type: "numeric",
SkipFreqNorm: true,
},
updated: &mapping.FieldMapping{
Type: "numeric",
SkipFreqNorm: false,
},
indexFieldInfo: nil,
err: true,
},
{ // index changed after all checks => updatable with index and docvalues delete
original: &mapping.FieldMapping{
Type: "geopoint",
Index: true,
},
updated: &mapping.FieldMapping{
Type: "geopoint",
Index: false,
},
indexFieldInfo: &index.UpdateFieldInfo{
Index: true,
DocValues: true,
},
err: false,
},
{ // docvalues changed after all checks => docvalues delete
original: &mapping.FieldMapping{
Type: "numeric",
DocValues: true,
},
updated: &mapping.FieldMapping{
Type: "numeric",
DocValues: false,
},
indexFieldInfo: &index.UpdateFieldInfo{
DocValues: true,
},
err: false,
},
{ // no relavent changes => continue but no op
original: &mapping.FieldMapping{
Name: "",
Type: "datetime",
Analyzer: "a",
Store: true,
Index: false,
IncludeTermVectors: true,
IncludeInAll: false,
DateFormat: "a",
DocValues: false,
SkipFreqNorm: true,
Dims: 128,
Similarity: "dot_product",
VectorIndexOptimizedFor: "memory-efficient",
SynonymSource: "a",
},
updated: &mapping.FieldMapping{
Name: "",
Type: "datetime",
Analyzer: "b",
Store: true,
Index: false,
IncludeTermVectors: true,
IncludeInAll: false,
DateFormat: "a",
DocValues: false,
SkipFreqNorm: true,
Dims: 256,
Similarity: "l2_norm",
VectorIndexOptimizedFor: "latency",
SynonymSource: "b",
},
indexFieldInfo: &index.UpdateFieldInfo{},
err: false,
},
}
for i, test := range tests {
rv, err := compareFieldMapping(test.original, test.updated)
if err == nil && test.err || err != nil && !test.err {
t.Errorf("Unexpected error value for test %d, expecting %t, got %v\n", i, test.err, err)
}
if rv == nil && test.indexFieldInfo != nil || rv != nil && test.indexFieldInfo == nil || !reflect.DeepEqual(rv, test.indexFieldInfo) {
t.Errorf("Unexpected index field info value for test %d, expecting %+v, got %+v, err %v", i, test.indexFieldInfo, rv, err)
}
}
}
func TestCompareMappings(t *testing.T) {
tests := []struct {
original *mapping.IndexMappingImpl
updated *mapping.IndexMappingImpl
err bool
}{
{ // changed type field when non empty mappings are present => error
original: &mapping.IndexMappingImpl{
TypeField: "a",
TypeMapping: map[string]*mapping.DocumentMapping{
"a": {},
"b": {},
},
},
updated: &mapping.IndexMappingImpl{
TypeField: "b",
TypeMapping: map[string]*mapping.DocumentMapping{
"a": {},
"b": {},
},
},
err: true,
},
{ // changed default type => error
original: &mapping.IndexMappingImpl{
DefaultType: "a",
},
updated: &mapping.IndexMappingImpl{
DefaultType: "b",
},
err: true,
},
{ // changed default analyzer => analyser true
original: &mapping.IndexMappingImpl{
DefaultAnalyzer: "a",
},
updated: &mapping.IndexMappingImpl{
DefaultAnalyzer: "b",
},
err: false,
},
{ // changed default datetimeparser => datetimeparser true
original: &mapping.IndexMappingImpl{
DefaultDateTimeParser: "a",
},
updated: &mapping.IndexMappingImpl{
DefaultDateTimeParser: "b",
},
err: false,
},
{ // changed default synonym source => synonym source true
original: &mapping.IndexMappingImpl{
DefaultSynonymSource: "a",
},
updated: &mapping.IndexMappingImpl{
DefaultSynonymSource: "b",
},
err: false,
},
{ // changed default field => false
original: &mapping.IndexMappingImpl{
DefaultField: "a",
},
updated: &mapping.IndexMappingImpl{
DefaultField: "b",
},
err: false,
},
{ // changed index dynamic => error
original: &mapping.IndexMappingImpl{
IndexDynamic: true,
},
updated: &mapping.IndexMappingImpl{
IndexDynamic: false,
},
err: true,
},
{ // changed store dynamic => error
original: &mapping.IndexMappingImpl{
StoreDynamic: false,
},
updated: &mapping.IndexMappingImpl{
StoreDynamic: true,
},
err: true,
},
{ // changed docvalues dynamic => error
original: &mapping.IndexMappingImpl{
DocValuesDynamic: true,
},
updated: &mapping.IndexMappingImpl{
DocValuesDynamic: false,
},
err: true,
},
}
for i, test := range tests {
err := compareMappings(test.original, test.updated)
if err == nil && test.err || err != nil && !test.err {
t.Errorf("Unexpected error value for test %d, expecting %t, got %v\n", i, test.err, err)
}
}
}
func TestCompareAnalysers(t *testing.T) {
ori := mapping.NewIndexMapping()
ori.DefaultMapping.AddFieldMappingsAt("a", NewTextFieldMapping())
ori.DefaultMapping.AddFieldMappingsAt("b", NewTextFieldMapping())
ori.DefaultMapping.AddFieldMappingsAt("c", NewTextFieldMapping())
ori.DefaultMapping.Properties["b"].DefaultAnalyzer = "3xbla"
ori.DefaultMapping.Properties["c"].DefaultAnalyzer = simple.Name
upd := mapping.NewIndexMapping()
upd.DefaultMapping.AddFieldMappingsAt("a", NewTextFieldMapping())
upd.DefaultMapping.AddFieldMappingsAt("b", NewTextFieldMapping())
upd.DefaultMapping.AddFieldMappingsAt("c", NewTextFieldMapping())
upd.DefaultMapping.Properties["b"].DefaultAnalyzer = "3xbla"
upd.DefaultMapping.Properties["c"].DefaultAnalyzer = simple.Name
if err := ori.AddCustomAnalyzer("3xbla", map[string]interface{}{
"type": custom.Name,
"tokenizer": whitespace.Name,
"token_filters": []interface{}{lowercase.Name, "stop_en"},
}); err != nil {
t.Fatal(err)
}
if err := upd.AddCustomAnalyzer("3xbla", map[string]interface{}{
"type": custom.Name,
"tokenizer": whitespace.Name,
"token_filters": []interface{}{lowercase.Name, "stop_en"},
}); err != nil {
t.Fatal(err)
}
oriPaths := map[string]*pathInfo{
"a": {
fieldMapInfo: []*fieldMapInfo{
{
fieldMapping: &mapping.FieldMapping{
Type: "text",
},
},
},
dynamic: false,
path: "a",
parentPath: "",
},
"b": {
fieldMapInfo: []*fieldMapInfo{
{
fieldMapping: &mapping.FieldMapping{
Type: "text",
},
},
},
dynamic: false,
path: "b",
parentPath: "",
},
"c": {
fieldMapInfo: []*fieldMapInfo{
{
fieldMapping: &mapping.FieldMapping{
Type: "text",
},
},
},
dynamic: false,
path: "c",
parentPath: "",
},
}
updPaths := map[string]*pathInfo{
"a": {
fieldMapInfo: []*fieldMapInfo{
{
fieldMapping: &mapping.FieldMapping{
Type: "text",
},
},
},
dynamic: false,
path: "a",
parentPath: "",
},
"b": {
fieldMapInfo: []*fieldMapInfo{
{
fieldMapping: &mapping.FieldMapping{
Type: "text",
},
},
},
dynamic: false,
path: "b",
parentPath: "",
},
"c": {
fieldMapInfo: []*fieldMapInfo{
{
fieldMapping: &mapping.FieldMapping{
Type: "text",
},
},
},
dynamic: false,
path: "c",
parentPath: "",
},
}
// Test case has identical analysers for text fields
err := compareAnalysers(oriPaths, updPaths, ori, upd)
if err != nil {
t.Errorf("Expected error to be nil, got %v", err)
}
ori2 := mapping.NewIndexMapping()
ori2.DefaultMapping.AddFieldMappingsAt("a", NewTextFieldMapping())
ori2.DefaultMapping.AddFieldMappingsAt("b", NewTextFieldMapping())
ori2.DefaultMapping.AddFieldMappingsAt("c", NewTextFieldMapping())
ori2.DefaultMapping.Properties["b"].DefaultAnalyzer = "3xbla"
ori2.DefaultMapping.Properties["c"].DefaultAnalyzer = simple.Name
upd2 := mapping.NewIndexMapping()
upd2.DefaultMapping.AddFieldMappingsAt("a", NewTextFieldMapping())
upd2.DefaultMapping.AddFieldMappingsAt("b", NewTextFieldMapping())
upd2.DefaultMapping.AddFieldMappingsAt("c", NewTextFieldMapping())
upd2.DefaultMapping.Properties["b"].DefaultAnalyzer = "3xbla"
upd2.DefaultMapping.Properties["c"].DefaultAnalyzer = simple.Name
if err := ori2.AddCustomAnalyzer("3xbla", map[string]interface{}{
"type": custom.Name,
"tokenizer": whitespace.Name,
"token_filters": []interface{}{lowercase.Name, "stop_en"},
}); err != nil {
t.Fatal(err)
}
if err := upd2.AddCustomAnalyzer("3xbla", map[string]interface{}{
"type": custom.Name,
"tokenizer": letter.Name,
"token_filters": []interface{}{lowercase.Name, "stop_en"},
}); err != nil {
t.Fatal(err)
}
// Test case has different custom analyser for field "b"
err = compareAnalysers(oriPaths, updPaths, ori2, upd2)
if err == nil {
t.Errorf("Expected error, got nil")
}
}
func TestCompareDatetimeParsers(t *testing.T) {
ori := mapping.NewIndexMapping()
ori.DefaultMapping.AddFieldMappingsAt("a", NewDateTimeFieldMapping())
ori.DefaultMapping.AddFieldMappingsAt("b", NewDateTimeFieldMapping())
ori.DefaultMapping.AddFieldMappingsAt("c", NewDateTimeFieldMapping())
ori.DefaultMapping.Properties["b"].Fields[0].DateFormat = "customDT"
ori.DefaultMapping.Properties["c"].Fields[0].DateFormat = percent.Name
upd := mapping.NewIndexMapping()
upd.DefaultMapping.AddFieldMappingsAt("a", NewDateTimeFieldMapping())
upd.DefaultMapping.AddFieldMappingsAt("b", NewDateTimeFieldMapping())
upd.DefaultMapping.AddFieldMappingsAt("c", NewDateTimeFieldMapping())
upd.DefaultMapping.Properties["b"].Fields[0].DateFormat = "customDT"
upd.DefaultMapping.Properties["c"].Fields[0].DateFormat = percent.Name
err := ori.AddCustomDateTimeParser("customDT", map[string]interface{}{
"type": sanitized.Name,
"layouts": []interface{}{
"02/01/2006 15:04:05",
"2006/01/02 3:04PM",
},
})
if err != nil {
t.Fatal(err)
}
err = upd.AddCustomDateTimeParser("customDT", map[string]interface{}{
"type": sanitized.Name,
"layouts": []interface{}{
"02/01/2006 15:04:05",
"2006/01/02 3:04PM",
},
})
if err != nil {
t.Fatal(err)
}
oriPaths := map[string]*pathInfo{
"a": {
fieldMapInfo: []*fieldMapInfo{
{
fieldMapping: &mapping.FieldMapping{
Type: "datetime",
},
},
},
dynamic: false,
path: "a",
parentPath: "",
},
"b": {
fieldMapInfo: []*fieldMapInfo{
{
fieldMapping: &mapping.FieldMapping{
Type: "datetime",
DateFormat: "customDT",
},
},
},
dynamic: false,
path: "b",
parentPath: "",
},
"c": {
fieldMapInfo: []*fieldMapInfo{
{
fieldMapping: &mapping.FieldMapping{
Type: "datetime",
},
},
},
dynamic: false,
path: "c",
parentPath: "",
},
}
updPaths := map[string]*pathInfo{
"a": {
fieldMapInfo: []*fieldMapInfo{
{
fieldMapping: &mapping.FieldMapping{
Type: "datetime",
},
},
},
dynamic: false,
path: "a",
parentPath: "",
},
"b": {
fieldMapInfo: []*fieldMapInfo{
{
fieldMapping: &mapping.FieldMapping{
Type: "datetime",
DateFormat: "customDT",
},
},
},
dynamic: false,
path: "b",
parentPath: "",
},
"c": {
fieldMapInfo: []*fieldMapInfo{
{
fieldMapping: &mapping.FieldMapping{
Type: "datetime",
},
},
},
dynamic: false,
path: "c",
parentPath: "",
},
}
// Test case has identical datetime parsers for all fields
err = compareDateTimeParsers(oriPaths, updPaths, ori, upd)
if err != nil {
t.Fatalf("Expected error to be nil, got %v", err)
}
ori2 := mapping.NewIndexMapping()
ori2.DefaultMapping.AddFieldMappingsAt("a", NewDateTimeFieldMapping())
ori2.DefaultMapping.AddFieldMappingsAt("b", NewDateTimeFieldMapping())
ori2.DefaultMapping.AddFieldMappingsAt("c", NewDateTimeFieldMapping())
ori2.DefaultMapping.Properties["b"].Fields[0].DateFormat = "customDT"
ori2.DefaultMapping.Properties["c"].Fields[0].DateFormat = percent.Name
upd2 := mapping.NewIndexMapping()
upd2.DefaultMapping.AddFieldMappingsAt("a", NewDateTimeFieldMapping())
upd2.DefaultMapping.AddFieldMappingsAt("b", NewDateTimeFieldMapping())
upd2.DefaultMapping.AddFieldMappingsAt("c", NewDateTimeFieldMapping())
upd2.DefaultMapping.Properties["b"].Fields[0].DateFormat = "customDT"
upd2.DefaultMapping.Properties["c"].Fields[0].DateFormat = percent.Name
err = ori2.AddCustomDateTimeParser("customDT", map[string]interface{}{
"type": sanitized.Name,
"layouts": []interface{}{
"02/01/2006 15:04:05",
"2006/01/02 3:04PM",
},
})
if err != nil {
t.Fatal(err)
}
err = upd2.AddCustomDateTimeParser("customDT", map[string]interface{}{
"type": sanitized.Name,
"layouts": []interface{}{
"02/01/2006 15:04:05",
"2006/01/02",
},
})
if err != nil {
t.Fatal(err)
}
// test case has different custom datetime parser for field "b"
err = compareDateTimeParsers(oriPaths, updPaths, ori2, upd2)
if err == nil {
t.Errorf("Expected error, got nil")
}
}
func TestCompareSynonymSources(t *testing.T) {
ori := mapping.NewIndexMapping()
ori.DefaultMapping.AddFieldMappingsAt("a", NewTextFieldMapping())
ori.DefaultMapping.AddFieldMappingsAt("b", NewTextFieldMapping())
ori.DefaultMapping.DefaultSynonymSource = "syn1"
ori.DefaultMapping.Properties["b"].Fields[0].SynonymSource = "syn2"
upd := mapping.NewIndexMapping()
upd.DefaultMapping.AddFieldMappingsAt("a", NewTextFieldMapping())
upd.DefaultMapping.AddFieldMappingsAt("b", NewTextFieldMapping())
upd.DefaultMapping.DefaultSynonymSource = "syn1"
upd.DefaultMapping.Properties["b"].Fields[0].SynonymSource = "syn2"
err := ori.AddSynonymSource("syn1", map[string]interface{}{
"collection": "col1",
"analyzer": simple.Name,
})
if err != nil {
t.Fatal(err)
}
err = ori.AddSynonymSource("syn2", map[string]interface{}{
"collection": "col2",
"analyzer": standard.Name,
})
if err != nil {
t.Fatal(err)
}
err = upd.AddSynonymSource("syn1", map[string]interface{}{
"collection": "col1",
"analyzer": simple.Name,
})
if err != nil {
t.Fatal(err)
}
err = upd.AddSynonymSource("syn2", map[string]interface{}{
"collection": "col2",
"analyzer": standard.Name,
})
if err != nil {
t.Fatal(err)
}
// Test case has identical synonym sources
err = compareSynonymSources(ori, upd)
if err != nil {
t.Errorf("Expected error to be nil, got %v", err)
}
ori2 := mapping.NewIndexMapping()
ori2.DefaultMapping.AddFieldMappingsAt("a", NewTextFieldMapping())
ori2.DefaultMapping.AddFieldMappingsAt("b", NewTextFieldMapping())
ori2.DefaultMapping.DefaultSynonymSource = "syn1"
ori2.DefaultMapping.Properties["b"].Fields[0].SynonymSource = "syn2"
upd2 := mapping.NewIndexMapping()
upd2.DefaultMapping.AddFieldMappingsAt("a", NewTextFieldMapping())
upd2.DefaultMapping.AddFieldMappingsAt("b", NewTextFieldMapping())
upd2.DefaultMapping.DefaultSynonymSource = "syn1"
upd2.DefaultMapping.Properties["b"].Fields[0].SynonymSource = "syn2"
err = ori2.AddSynonymSource("syn1", map[string]interface{}{
"collection": "col1",
"analyzer": simple.Name,
})
if err != nil {
t.Fatal(err)
}
err = ori2.AddSynonymSource("syn2", map[string]interface{}{
"collection": "col2",
"analyzer": standard.Name,
})
if err != nil {
t.Fatal(err)
}
err = upd2.AddSynonymSource("syn1", map[string]interface{}{
"collection": "col1",
"analyzer": simple.Name,
})
if err != nil {
t.Fatal(err)
}
err = upd2.AddSynonymSource("syn2", map[string]interface{}{
"collection": "col3",
"analyzer": standard.Name,
})
if err != nil {
t.Fatal(err)
}
// test case has different synonym sources
err = compareSynonymSources(ori2, upd2)
if err == nil {
t.Errorf("Expected error, got nil")
}
}
func TestDeletedFields(t *testing.T) {
tests := []struct {
original *mapping.IndexMappingImpl
updated *mapping.IndexMappingImpl
fieldInfo map[string]*index.UpdateFieldInfo
err bool
}{
{
// changed default analyzer with index dynamic
// => error
original: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{},
DefaultMapping: &mapping.DocumentMapping{},
DefaultAnalyzer: standard.Name,
IndexDynamic: true,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
updated: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{},
DefaultMapping: &mapping.DocumentMapping{},
DefaultAnalyzer: simple.Name,
IndexDynamic: true,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
fieldInfo: nil,
err: true,
},
{
// changed default analyzer within a mapping with index dynamic
// => error
original: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: true,
DefaultAnalyzer: standard.Name,
},
DefaultAnalyzer: "",
IndexDynamic: true,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
updated: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: true,
DefaultAnalyzer: simple.Name,
},
IndexDynamic: true,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
fieldInfo: nil,
err: true,
},
{
// changed default datetime parser with index dynamic
// => error
original: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{},
DefaultMapping: &mapping.DocumentMapping{},
DefaultDateTimeParser: percent.Name,
IndexDynamic: true,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
updated: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{},
DefaultMapping: &mapping.DocumentMapping{},
DefaultDateTimeParser: sanitized.Name,
IndexDynamic: true,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
fieldInfo: nil,
err: true,
},
{
// no change between original and updated having type and default mapping
// => empty fieldInfo with no error
original: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map2": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
updated: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map2": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
fieldInfo: map[string]*index.UpdateFieldInfo{},
err: false,
},
{
// no changes in type mappings and default mapping disabled with changes
// => empty fieldInfo with no error
original: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map2": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: false,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
updated: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map2": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: false,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"d": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
fieldInfo: map[string]*index.UpdateFieldInfo{},
err: false,
},
{
// new type mappings in updated => error
original: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map2": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
updated: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map2": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
fieldInfo: nil,
err: true,
},
{
// new mappings in default mapping => error
original: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
updated: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
fieldInfo: nil,
err: true,
},
{
// fully removed mapping in type with some dynamic => error
original: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map2": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
updated: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: true,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
fieldInfo: nil,
err: true,
},
{
// semi removed mapping in default with some dynamic
// proper fieldInfo with no errors
original: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map2": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
updated: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map2": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: false,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
fieldInfo: map[string]*index.UpdateFieldInfo{
"b": {
Index: true,
DocValues: true,
},
},
err: false,
},
{
// two fields from diff paths with removed content matching
// => relavent fieldInfo
original: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map2": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
updated: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: false,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map2": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: false,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
fieldInfo: map[string]*index.UpdateFieldInfo{
"a": {
Index: true,
DocValues: true,
},
},
err: false,
},
{
// two fields from diff paths with removed content not matching
// => error
original: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map2": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
updated: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map2": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: false,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
fieldInfo: nil,
err: true,
},
{
// two fields from the same path => relavent fieldInfo
original: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Name: "a",
Type: "numeric",
Index: true,
Store: true,
},
{
Name: "b",
Type: "numeric",
Index: true,
Store: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
updated: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Name: "a",
Type: "numeric",
Index: false,
Store: true,
},
{
Name: "b",
Type: "numeric",
Index: true,
Store: false,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
fieldInfo: map[string]*index.UpdateFieldInfo{
"a": {
Index: true,
DocValues: true,
},
"b": {
Store: true,
},
},
err: false,
},
{
// one store, one index, one dynamic and one all removed in type and default
// => relavent fieldInfo without error
original: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map2": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Store: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map3": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
DocValues: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"d": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: true,
Store: true,
DocValues: true,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
updated: &mapping.IndexMappingImpl{
TypeMapping: map[string]*mapping.DocumentMapping{
"map1": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Index: false,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map2": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
Store: false,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
"map3": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "numeric",
DocValues: false,
},
},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
},
DefaultMapping: &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "",
DefaultSynonymSource: "",
},
IndexDynamic: false,
StoreDynamic: false,
DocValuesDynamic: false,
CustomAnalysis: NewIndexMapping().CustomAnalysis,
},
fieldInfo: map[string]*index.UpdateFieldInfo{
"a": {
Index: true,
DocValues: true,
},
"b": {
Store: true,
},
"c": {
DocValues: true,
},
"d": {
Deleted: true,
},
},
err: false,
},
}
for i, test := range tests {
info, err := DeletedFields(test.original, test.updated)
if err == nil && test.err || err != nil && !test.err {
t.Errorf("Unexpected error value for test %d, expecting %t, got %v\n", i, test.err, err)
}
if info == nil && test.fieldInfo != nil || info != nil && test.fieldInfo == nil || !reflect.DeepEqual(info, test.fieldInfo) {
t.Errorf("Unexpected default info value for test %d, expecting %+v, got %+v, err %v", i, test.fieldInfo, info, err)
}
}
}
func TestIndexUpdateText(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
indexMappingBefore := mapping.NewIndexMapping()
indexMappingBefore.TypeMapping = map[string]*mapping.DocumentMapping{}
indexMappingBefore.DefaultMapping = &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "text",
Index: true,
Store: true,
},
},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
},
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "text",
Index: true,
Store: true,
},
},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
},
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "text",
Index: true,
Store: true,
},
},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
},
"d": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "text",
Index: true,
Store: true,
},
},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
}
indexMappingBefore.IndexDynamic = false
indexMappingBefore.StoreDynamic = false
indexMappingBefore.DocValuesDynamic = false
index, err := New(tmpIndexPath, indexMappingBefore)
if err != nil {
t.Fatal(err)
}
doc1 := map[string]interface{}{"a": "xyz", "b": "abc", "c": "def", "d": "ghi"}
doc2 := map[string]interface{}{"a": "uvw", "b": "rst", "c": "klm", "d": "pqr"}
doc3 := map[string]interface{}{"a": "xyz", "b": "def", "c": "abc", "d": "mno"}
batch := index.NewBatch()
err = batch.Index("001", doc1)
if err != nil {
t.Fatal(err)
}
err = batch.Index("002", doc2)
if err != nil {
t.Fatal(err)
}
err = batch.Index("003", doc3)
if err != nil {
t.Fatal(err)
}
err = index.Batch(batch)
if err != nil {
t.Fatal(err)
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
indexMappingAfter := mapping.NewIndexMapping()
indexMappingAfter.TypeMapping = map[string]*mapping.DocumentMapping{}
indexMappingAfter.DefaultMapping = &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "text",
Index: true,
Store: true,
},
},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
},
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "text",
Index: false,
Store: true,
},
},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
},
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "text",
Index: true,
Store: false,
},
},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
}
indexMappingAfter.IndexDynamic = false
indexMappingAfter.StoreDynamic = false
indexMappingAfter.DocValuesDynamic = false
mappingString, err := json.Marshal(indexMappingAfter)
if err != nil {
t.Fatal(err)
}
config := map[string]interface{}{
"updated_mapping": string(mappingString),
}
index, err = OpenUsing(tmpIndexPath, config)
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
q1 := NewSearchRequest(NewQueryStringQuery("a:*"))
q1.Fields = append(q1.Fields, "a")
res1, err := index.Search(q1)
if err != nil {
t.Fatal(err)
}
if len(res1.Hits) != 3 {
t.Errorf("Expected 3 hits, got %d\n", len(res1.Hits))
}
if len(res1.Hits[0].Fields) != 1 {
t.Errorf("Expected 1 field, got %d\n", len(res1.Hits[0].Fields))
}
q2 := NewSearchRequest(NewQueryStringQuery("b:*"))
q2.Fields = append(q2.Fields, "b")
res2, err := index.Search(q2)
if err != nil {
t.Fatal(err)
}
if len(res2.Hits) != 0 {
t.Errorf("Expected 0 hits, got %d\n", len(res2.Hits))
}
q3 := NewSearchRequest(NewQueryStringQuery("c:*"))
q3.Fields = append(q3.Fields, "c")
res3, err := index.Search(q3)
if err != nil {
t.Fatal(err)
}
if len(res3.Hits) != 3 {
t.Errorf("Expected 3 hits, got %d\n", len(res3.Hits))
}
if len(res3.Hits[0].Fields) != 0 {
t.Errorf("Expected 0 fields, got %d\n", len(res3.Hits[0].Fields))
}
q4 := NewSearchRequest(NewQueryStringQuery("d:*"))
q4.Fields = append(q4.Fields, "d")
res4, err := index.Search(q4)
if err != nil {
t.Fatal(err)
}
if len(res4.Hits) != 0 {
t.Errorf("Expected 0 hits, got %d\n", len(res4.Hits))
}
}
func TestIndexUpdateSynonym(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
synonymCollection := "collection1"
synonymSourceName := "english"
analyzer := en.AnalyzerName
synonymSourceConfig := map[string]interface{}{
"collection": synonymCollection,
"analyzer": analyzer,
}
a := mapping.NewTextFieldMapping()
a.Analyzer = analyzer
a.SynonymSource = synonymSourceName
a.IncludeInAll = false
b := mapping.NewTextFieldMapping()
b.Analyzer = analyzer
b.SynonymSource = synonymSourceName
b.IncludeInAll = false
c := mapping.NewTextFieldMapping()
c.Analyzer = analyzer
c.SynonymSource = synonymSourceName
c.IncludeInAll = false
indexMappingBefore := mapping.NewIndexMapping()
indexMappingBefore.DefaultMapping.AddFieldMappingsAt("a", a)
indexMappingBefore.DefaultMapping.AddFieldMappingsAt("b", b)
indexMappingBefore.DefaultMapping.AddFieldMappingsAt("c", c)
err := indexMappingBefore.AddSynonymSource(synonymSourceName, synonymSourceConfig)
if err != nil {
t.Fatal(err)
}
indexMappingBefore.IndexDynamic = false
indexMappingBefore.StoreDynamic = false
indexMappingBefore.DocValuesDynamic = false
index, err := New(tmpIndexPath, indexMappingBefore)
if err != nil {
t.Fatal(err)
}
doc1 := map[string]interface{}{
"a": `The hardworking employee consistently strives to exceed expectations.
His industrious nature makes him a valuable asset to any team.
His conscientious attention to detail ensures that projects are completed efficiently and accurately.
He remains persistent even in the face of challenges.`,
"b": `The hardworking employee consistently strives to exceed expectations.
His industrious nature makes him a valuable asset to any team.
His conscientious attention to detail ensures that projects are completed efficiently and accurately.
He remains persistent even in the face of challenges.`,
"c": `The hardworking employee consistently strives to exceed expectations.
His industrious nature makes him a valuable asset to any team.
His conscientious attention to detail ensures that projects are completed efficiently and accurately.
He remains persistent even in the face of challenges.`,
}
doc2 := map[string]interface{}{
"a": `The tranquil surroundings of the retreat provide a perfect escape from the hustle and bustle of city life.
Guests enjoy the peaceful atmosphere, which is perfect for relaxation and rejuvenation.
The calm environment offers the ideal place to meditate and connect with nature.
Even the most stressed individuals find themselves feeling relaxed and at ease.`,
"b": `The tranquil surroundings of the retreat provide a perfect escape from the hustle and bustle of city life.
Guests enjoy the peaceful atmosphere, which is perfect for relaxation and rejuvenation.
The calm environment offers the ideal place to meditate and connect with nature.
Even the most stressed individuals find themselves feeling relaxed and at ease.`,
"c": `The tranquil surroundings of the retreat provide a perfect escape from the hustle and bustle of city life.
Guests enjoy the peaceful atmosphere, which is perfect for relaxation and rejuvenation.
The calm environment offers the ideal place to meditate and connect with nature.
Even the most stressed individuals find themselves feeling relaxed and at ease.`,
}
synDoc1 := &SynonymDefinition{Synonyms: []string{"hardworking", "industrious", "conscientious", "persistent", "focused", "devoted"}}
synDoc2 := &SynonymDefinition{Synonyms: []string{"tranquil", "peaceful", "calm", "relaxed", "unruffled"}}
batch := index.NewBatch()
err = batch.IndexSynonym("001", synonymCollection, synDoc1)
if err != nil {
t.Fatal(err)
}
err = batch.IndexSynonym("002", synonymCollection, synDoc2)
if err != nil {
t.Fatal(err)
}
err = batch.Index("003", doc1)
if err != nil {
t.Fatal(err)
}
err = batch.Index("004", doc2)
if err != nil {
t.Fatal(err)
}
err = index.Batch(batch)
if err != nil {
t.Fatal(err)
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
indexMappingAfter := mapping.NewIndexMapping()
indexMappingAfter.DefaultMapping.AddFieldMappingsAt("a", a)
b.Index = false
indexMappingAfter.DefaultMapping.AddFieldMappingsAt("b", b)
err = indexMappingAfter.AddSynonymSource(synonymSourceName, synonymSourceConfig)
if err != nil {
t.Fatal(err)
}
indexMappingAfter.IndexDynamic = false
indexMappingAfter.StoreDynamic = false
indexMappingAfter.DocValuesDynamic = false
mappingString, err := json.Marshal(indexMappingAfter)
if err != nil {
t.Fatal(err)
}
config := map[string]interface{}{
"updated_mapping": string(mappingString),
}
index, err = OpenUsing(tmpIndexPath, config)
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
q1 := NewSearchRequest(NewQueryStringQuery("a:devoted"))
res1, err := index.Search(q1)
if err != nil {
t.Fatal(err)
}
if len(res1.Hits) != 1 {
t.Errorf("Expected 1 hit, got %d\n", len(res1.Hits))
}
q2 := NewSearchRequest(NewQueryStringQuery("b:devoted"))
res2, err := index.Search(q2)
if err != nil {
t.Fatal(err)
}
if len(res2.Hits) != 0 {
t.Errorf("Expected 0 hits, got %d\n", len(res2.Hits))
}
q3 := NewSearchRequest(NewQueryStringQuery("c:unruffled"))
res3, err := index.Search(q3)
if err != nil {
t.Fatal(err)
}
if len(res3.Hits) != 0 {
t.Errorf("Expected 0 hits, got %d\n", len(res3.Hits))
}
}
func TestIndexUpdateMerge(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
indexMappingBefore := mapping.NewIndexMapping()
indexMappingBefore.TypeMapping = map[string]*mapping.DocumentMapping{}
indexMappingBefore.DefaultMapping = &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "text",
Index: true,
Store: true,
},
},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
},
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "text",
Index: true,
Store: true,
},
},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
},
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "text",
Index: true,
Store: true,
},
},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
},
"d": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "text",
Index: true,
Store: true,
},
},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
}
indexMappingBefore.IndexDynamic = false
indexMappingBefore.StoreDynamic = false
indexMappingBefore.DocValuesDynamic = false
index, err := New(tmpIndexPath, indexMappingBefore)
if err != nil {
t.Fatal(err)
}
numDocsPerBatch := 1000
numBatches := 10
var batch *Batch
doc := make(map[string]interface{})
const letters = "abcdefghijklmnopqrstuvwxyz"
randStr := func() string {
result := make([]byte, 3)
for i := 0; i < 3; i++ {
result[i] = letters[rand.Intn(len(letters))]
}
return string(result)
}
for i := 0; i < numBatches; i++ {
batch = index.NewBatch()
for j := 0; j < numDocsPerBatch; j++ {
doc["a"] = randStr()
doc["b"] = randStr()
doc["c"] = randStr()
doc["d"] = randStr()
err = batch.Index(fmt.Sprintf("%d", i*numDocsPerBatch+j), doc)
if err != nil {
t.Fatal(err)
}
}
err = index.Batch(batch)
if err != nil {
t.Fatal(err)
}
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
indexMappingAfter := mapping.NewIndexMapping()
indexMappingAfter.TypeMapping = map[string]*mapping.DocumentMapping{}
indexMappingAfter.DefaultMapping = &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "text",
Index: true,
Store: true,
},
},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
},
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "text",
Index: false,
Store: true,
},
},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
},
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "text",
Index: true,
Store: false,
},
},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
}
indexMappingAfter.IndexDynamic = false
indexMappingAfter.StoreDynamic = false
indexMappingAfter.DocValuesDynamic = false
mappingString, err := json.Marshal(indexMappingAfter)
if err != nil {
t.Fatal(err)
}
config := map[string]interface{}{
"updated_mapping": string(mappingString),
}
index, err = OpenUsing(tmpIndexPath, config)
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
impl, ok := index.(*indexImpl)
if !ok {
t.Fatalf("Typecasting index to indexImpl failed")
}
sindex, ok := impl.i.(*scorch.Scorch)
if !ok {
t.Fatalf("Typecasting index to scorch index failed")
}
err = sindex.ForceMerge(context.Background(), &mergeplan.SingleSegmentMergePlanOptions)
if err != nil {
t.Fatal(err)
}
q1 := NewSearchRequest(NewQueryStringQuery("a:*"))
q1.Fields = append(q1.Fields, "a")
res1, err := index.Search(q1)
if err != nil {
t.Fatal(err)
}
if len(res1.Hits) != 10 {
t.Errorf("Expected 10 hits, got %d\n", len(res1.Hits))
}
if len(res1.Hits[0].Fields) != 1 {
t.Errorf("Expected 1 field, got %d\n", len(res1.Hits[0].Fields))
}
q2 := NewSearchRequest(NewQueryStringQuery("b:*"))
q2.Fields = append(q2.Fields, "b")
res2, err := index.Search(q2)
if err != nil {
t.Fatal(err)
}
if len(res2.Hits) != 0 {
t.Errorf("Expected 0 hits, got %d\n", len(res2.Hits))
}
q3 := NewSearchRequest(NewQueryStringQuery("c:*"))
q3.Fields = append(q3.Fields, "c")
res3, err := index.Search(q3)
if err != nil {
t.Fatal(err)
}
if len(res3.Hits) != 10 {
t.Errorf("Expected 10 hits, got %d\n", len(res3.Hits))
}
if len(res3.Hits[0].Fields) != 0 {
t.Errorf("Expected 0 fields, got %d\n", len(res3.Hits[0].Fields))
}
q4 := NewSearchRequest(NewQueryStringQuery("d:*"))
q4.Fields = append(q4.Fields, "d")
res4, err := index.Search(q4)
if err != nil {
t.Fatal(err)
}
if len(res4.Hits) != 0 {
t.Errorf("Expected 0 hits, got %d\n", len(res4.Hits))
}
}
func BenchmarkIndexUpdateText(b *testing.B) {
tmpIndexPath := createTmpIndexPath(b)
defer cleanupTmpIndexPath(b, tmpIndexPath)
indexMappingBefore := mapping.NewIndexMapping()
indexMappingBefore.TypeMapping = map[string]*mapping.DocumentMapping{}
indexMappingBefore.DefaultMapping = &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "text",
Index: true,
Store: true,
},
},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
}
indexMappingBefore.IndexDynamic = false
indexMappingBefore.StoreDynamic = false
indexMappingBefore.DocValuesDynamic = false
index, err := New(tmpIndexPath, indexMappingBefore)
if err != nil {
b.Fatal(err)
}
numDocsPerBatch := 1000
numBatches := 5
var batch *Batch
doc := make(map[string]interface{})
const letters = "abcdefghijklmnopqrstuvwxyz"
randStr := func() string {
result := make([]byte, 3)
for i := 0; i < 3; i++ {
result[i] = letters[rand.Intn(len(letters))]
}
return string(result)
}
for i := 0; i < numBatches; i++ {
batch = index.NewBatch()
for j := 0; j < numDocsPerBatch; j++ {
doc["a"] = randStr()
err = batch.Index(fmt.Sprintf("%d", i*numDocsPerBatch+j), doc)
if err != nil {
b.Fatal(err)
}
}
err = index.Batch(batch)
if err != nil {
b.Fatal(err)
}
}
err = index.Close()
if err != nil {
b.Fatal(err)
}
indexMappingAfter := mapping.NewIndexMapping()
indexMappingAfter.TypeMapping = map[string]*mapping.DocumentMapping{}
indexMappingAfter.DefaultMapping = &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "text",
Index: true,
Store: false,
},
},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
},
},
Fields: []*mapping.FieldMapping{},
DefaultAnalyzer: "standard",
DefaultSynonymSource: "",
}
indexMappingAfter.IndexDynamic = false
indexMappingAfter.StoreDynamic = false
indexMappingAfter.DocValuesDynamic = false
mappingString, err := json.Marshal(indexMappingAfter)
if err != nil {
b.Fatal(err)
}
config := map[string]interface{}{
"updated_mapping": string(mappingString),
}
index, err = OpenUsing(tmpIndexPath, config)
if err != nil {
b.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
b.Fatal(err)
}
}()
b.ResetTimer()
for i := 0; i < b.N; i++ {
q := NewQueryStringQuery("a:*")
req := NewSearchRequest(q)
if _, err = index.Search(req); err != nil {
b.Fatal(err)
}
}
}
func TestIndexUpdateNestedMapping(t *testing.T) {
// Helper: create a mapping with optional nested structure
createCompanyMapping := func(nestedEmployees, nestedDepartments, nestedProjects, nestedLocations bool) *mapping.IndexMappingImpl {
rv := mapping.NewIndexMapping()
companyMapping := mapping.NewDocumentMapping()
// Basic fields
companyMapping.AddFieldMappingsAt("id", mapping.NewTextFieldMapping())
companyMapping.AddFieldMappingsAt("name", mapping.NewTextFieldMapping())
var deptMapping *mapping.DocumentMapping
// Departments nested conditionally
if !nestedDepartments {
deptMapping = mapping.NewDocumentMapping()
} else {
deptMapping = mapping.NewNestedDocumentMapping()
}
deptMapping.AddFieldMappingsAt("name", mapping.NewTextFieldMapping())
deptMapping.AddFieldMappingsAt("budget", mapping.NewNumericFieldMapping())
// Employees nested conditionally
var empMapping *mapping.DocumentMapping
if !nestedEmployees {
empMapping = mapping.NewNestedDocumentMapping()
} else {
empMapping = mapping.NewDocumentMapping()
}
empMapping.AddFieldMappingsAt("name", mapping.NewTextFieldMapping())
empMapping.AddFieldMappingsAt("role", mapping.NewTextFieldMapping())
deptMapping.AddSubDocumentMapping("employees", empMapping)
// Projects nested conditionally
var projMapping *mapping.DocumentMapping
if !nestedProjects {
projMapping = mapping.NewNestedDocumentMapping()
} else {
projMapping = mapping.NewDocumentMapping()
}
projMapping.AddFieldMappingsAt("title", mapping.NewTextFieldMapping())
projMapping.AddFieldMappingsAt("status", mapping.NewTextFieldMapping())
deptMapping.AddSubDocumentMapping("projects", projMapping)
companyMapping.AddSubDocumentMapping("departments", deptMapping)
// Locations nested conditionally
var locMapping *mapping.DocumentMapping
if nestedLocations {
locMapping = mapping.NewNestedDocumentMapping()
} else {
locMapping = mapping.NewDocumentMapping()
}
locMapping.AddFieldMappingsAt("address", mapping.NewTextFieldMapping())
locMapping.AddFieldMappingsAt("city", mapping.NewTextFieldMapping())
companyMapping.AddSubDocumentMapping("locations", locMapping)
rv.DefaultMapping.AddSubDocumentMapping("company", companyMapping)
return rv
}
tests := []struct {
name string
original *mapping.IndexMappingImpl
updated *mapping.IndexMappingImpl
expectErr bool
}{
{
name: "No nested to all nested",
original: createCompanyMapping(false, false, false, false),
updated: createCompanyMapping(true, true, true, true),
expectErr: true,
},
{
name: "No nested to mixed nested",
original: createCompanyMapping(false, false, false, false),
updated: createCompanyMapping(true, false, true, false),
expectErr: true,
},
{
name: "No nested to mixed nested",
original: createCompanyMapping(false, false, false, false),
updated: createCompanyMapping(true, true, true, false),
expectErr: true,
},
{
name: "Mixed nested to no nested",
original: createCompanyMapping(false, true, false, true),
updated: createCompanyMapping(false, false, true, true),
expectErr: true,
},
{
name: "All nested to no nested",
original: createCompanyMapping(true, true, true, true),
updated: createCompanyMapping(false, false, false, false),
expectErr: true,
},
{
name: "Mixed nested to all nested",
original: createCompanyMapping(true, false, true, false),
updated: createCompanyMapping(true, true, true, true),
expectErr: true,
},
{
name: "All nested to mixed nested",
original: createCompanyMapping(true, true, true, true),
updated: createCompanyMapping(true, false, true, false),
expectErr: true,
},
{
name: "No nested to no nested",
original: createCompanyMapping(false, false, false, false),
updated: createCompanyMapping(false, false, false, false),
expectErr: false,
},
{
name: "All nested to all nested",
original: createCompanyMapping(true, true, true, true),
updated: createCompanyMapping(true, true, true, true),
expectErr: false,
},
}
for _, test := range tests {
_, err := DeletedFields(test.original, test.updated)
if (err != nil) != test.expectErr {
t.Errorf("Test '%s' unexpected error state: got %v, expectErr %t", test.name, err, test.expectErr)
}
}
}
func TestTemp(t *testing.T) {
oriJSON := `
{
"default_analyzer": "standard",
"default_datetime_parser": "dateTimeOptional",
"default_field": "_all",
"default_mapping": {
"dynamic": true,
"enabled": false
},
"default_type": "_default",
"docvalues_dynamic": false,
"index_dynamic": true,
"scoring_model": "tf-idf",
"store_dynamic": false,
"type_field": "_type",
"types": {
"inventory.hotel": {
"dynamic": false,
"enabled": true,
"properties": {
"city": {
"enabled": true,
"dynamic": false,
"fields": [
{
"analyzer": "keyword",
"docvalues": true,
"index": true,
"name": "city",
"store": true,
"type": "text"
}
]
},
"country": {
"enabled": true,
"dynamic": false,
"fields": [
{
"analyzer": "keyword",
"docvalues": true,
"index": true,
"name": "country",
"store": true,
"type": "text"
}
]
},
"description": {
"enabled": true,
"dynamic": false,
"fields": [
{
"analyzer": "en",
"docvalues": true,
"index": true,
"name": "description",
"store": true,
"type": "text"
}
]
},
"free_breakfast": {
"enabled": true,
"dynamic": false,
"fields": [
{
"docvalues": true,
"index": true,
"name": "free_breakfast",
"store": true,
"type": "boolean"
}
]
},
"geo": {
"enabled": true,
"dynamic": false,
"fields": [
{
"index": true,
"name": "geo",
"store": true,
"type": "geopoint"
}
]
},
"id": {
"enabled": true,
"dynamic": false,
"fields": [
{
"docvalues": true,
"index": true,
"name": "id",
"store": true,
"type": "number"
}
]
},
"name": {
"enabled": true,
"dynamic": false,
"fields": [
{
"analyzer": "en",
"docvalues": true,
"index": true,
"name": "name",
"type": "text",
"store": true
}
]
},
"phone": {
"enabled": true,
"dynamic": false,
"fields": [
{
"analyzer": "keyword",
"docvalues": true,
"index": true,
"name": "phone",
"type": "text"
}
]
},
"title": {
"enabled": true,
"dynamic": false,
"fields": [
{
"analyzer": "en",
"docvalues": true,
"index": true,
"name": "title",
"store": true,
"type": "text"
}
]
},
"vacancy": {
"enabled": true,
"dynamic": false,
"fields": [
{
"docvalues": true,
"index": true,
"name": "vacancy",
"store": true,
"type": "boolean"
}
]
}
}
}
}
}`
updJSON := `
{
"default_analyzer": "standard",
"default_datetime_parser": "dateTimeOptional",
"default_field": "_all",
"default_mapping": {
"dynamic": true,
"enabled": false
},
"default_type": "_default",
"docvalues_dynamic": false,
"index_dynamic": true,
"scoring_model": "tf-idf",
"store_dynamic": false,
"type_field": "_type",
"types": {
"inventory.hotel": {
"dynamic": false,
"enabled": true,
"properties": {
"city": {
"enabled": true,
"dynamic": false,
"fields": [
{
"analyzer": "keyword",
"docvalues": true,
"index": true,
"name": "city",
"store": true,
"type": "text"
}
]
},
"country": {
"enabled": true,
"dynamic": false,
"fields": [
{
"analyzer": "keyword",
"docvalues": true,
"index": true,
"name": "country",
"store": true,
"type": "text"
}
]
},
"description": {
"enabled": true,
"dynamic": false,
"fields": [
{
"analyzer": "en",
"docvalues": true,
"index": true,
"name": "description",
"store": true,
"type": "text"
}
]
},
"free_breakfast": {
"enabled": true,
"dynamic": false,
"fields": [
{
"docvalues": true,
"index": true,
"name": "free_breakfast",
"store": true,
"type": "boolean"
}
]
},
"geo": {
"enabled": true,
"dynamic": false,
"fields": [
{
"index": true,
"name": "geo",
"store": true,
"type": "geopoint"
}
]
},
"id": {
"enabled": true,
"dynamic": false,
"fields": [
{
"docvalues": true,
"index": true,
"name": "id",
"store": true,
"type": "number"
}
]
},
"name": {
"enabled": true,
"dynamic": false,
"fields": [
{
"analyzer": "en",
"docvalues": true,
"index": true,
"name": "name",
"type": "text",
"store": false
}
]
},
"phone": {
"enabled": true,
"dynamic": false,
"fields": [
{
"analyzer": "keyword",
"docvalues": true,
"index": true,
"name": "phone",
"type": "text"
}
]
},
"title": {
"enabled": true,
"dynamic": false,
"fields": [
{
"analyzer": "en",
"docvalues": true,
"index": true,
"name": "title",
"store": true,
"type": "text"
}
]
},
"vacancy": {
"enabled": true,
"dynamic": false,
"fields": [
{
"docvalues": true,
"index": true,
"name": "vacancy",
"store": true,
"type": "boolean"
}
]
}
}
}
}
}`
var originalMapping *mapping.IndexMappingImpl
err := json.Unmarshal([]byte(oriJSON), &originalMapping)
if err != nil {
t.Fatalf("Error unmarshalling mapping JSON: %v", err)
}
var updatedMapping *mapping.IndexMappingImpl
err = json.Unmarshal([]byte(updJSON), &updatedMapping)
if err != nil {
t.Fatalf("Error unmarshalling mapping JSON: %v", err)
}
deletedFields, err := DeletedFields(originalMapping, updatedMapping)
if err != nil {
t.Fatalf("Error comparing mappings: %v", err)
}
fmt.Printf("Deleted fields: %v\n", deletedFields)
}
================================================
FILE: mapping/analysis.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
type customAnalysis struct {
CharFilters map[string]map[string]interface{} `json:"char_filters,omitempty"`
Tokenizers map[string]map[string]interface{} `json:"tokenizers,omitempty"`
TokenMaps map[string]map[string]interface{} `json:"token_maps,omitempty"`
TokenFilters map[string]map[string]interface{} `json:"token_filters,omitempty"`
Analyzers map[string]map[string]interface{} `json:"analyzers,omitempty"`
DateTimeParsers map[string]map[string]interface{} `json:"date_time_parsers,omitempty"`
SynonymSources map[string]map[string]interface{} `json:"synonym_sources,omitempty"`
}
func (c *customAnalysis) registerAll(i *IndexMappingImpl) error {
for name, config := range c.CharFilters {
_, err := i.cache.DefineCharFilter(name, config)
if err != nil {
return err
}
}
if len(c.Tokenizers) > 0 {
// put all the names in map tracking work to do
todo := map[string]struct{}{}
for name := range c.Tokenizers {
todo[name] = struct{}{}
}
registered := 1
errs := []error{}
// as long as we keep making progress, keep going
for len(todo) > 0 && registered > 0 {
registered = 0
errs = []error{}
for name := range todo {
config := c.Tokenizers[name]
_, err := i.cache.DefineTokenizer(name, config)
if err != nil {
errs = append(errs, err)
} else {
delete(todo, name)
registered++
}
}
}
if len(errs) > 0 {
return errs[0]
}
}
for name, config := range c.TokenMaps {
_, err := i.cache.DefineTokenMap(name, config)
if err != nil {
return err
}
}
for name, config := range c.TokenFilters {
_, err := i.cache.DefineTokenFilter(name, config)
if err != nil {
return err
}
}
for name, config := range c.Analyzers {
_, err := i.cache.DefineAnalyzer(name, config)
if err != nil {
return err
}
}
for name, config := range c.DateTimeParsers {
_, err := i.cache.DefineDateTimeParser(name, config)
if err != nil {
return err
}
}
for name, config := range c.SynonymSources {
_, err := i.cache.DefineSynonymSource(name, config)
if err != nil {
return err
}
}
return nil
}
func newCustomAnalysis() *customAnalysis {
rv := customAnalysis{
CharFilters: make(map[string]map[string]interface{}),
Tokenizers: make(map[string]map[string]interface{}),
TokenMaps: make(map[string]map[string]interface{}),
TokenFilters: make(map[string]map[string]interface{}),
Analyzers: make(map[string]map[string]interface{}),
DateTimeParsers: make(map[string]map[string]interface{}),
SynonymSources: make(map[string]map[string]interface{}),
}
return &rv
}
================================================
FILE: mapping/document.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
import (
"encoding"
"encoding/json"
"fmt"
"net"
"reflect"
"time"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/util"
)
// A DocumentMapping describes how a type of document
// should be indexed.
// As documents can be hierarchical, named sub-sections
// of documents are mapped using the same structure in
// the Properties field.
// Each value inside a document can be indexed 0 or more
// ways. These index entries are called fields and
// are stored in the Fields field.
// Entire sections of a document can be ignored or
// excluded by setting Enabled to false.
// If not explicitly mapped, default mapping operations
// are used. To disable this automatic handling, set
// Dynamic to false.
type DocumentMapping struct {
Enabled bool `json:"enabled"`
Dynamic bool `json:"dynamic"`
Properties map[string]*DocumentMapping `json:"properties,omitempty"`
Fields []*FieldMapping `json:"fields,omitempty"`
Nested bool `json:"nested,omitempty"`
DefaultAnalyzer string `json:"default_analyzer,omitempty"`
DefaultSynonymSource string `json:"default_synonym_source,omitempty"`
// StructTagKey overrides "json" when looking for field names in struct tags
StructTagKey string `json:"struct_tag_key,omitempty"`
}
func (dm *DocumentMapping) Validate(cache *registry.Cache,
path []string, fieldAliasCtx map[string]*FieldMapping,
) error {
var err error
if dm.DefaultAnalyzer != "" {
_, err := cache.AnalyzerNamed(dm.DefaultAnalyzer)
if err != nil {
return err
}
}
if dm.DefaultSynonymSource != "" {
_, err := cache.SynonymSourceNamed(dm.DefaultSynonymSource)
if err != nil {
return err
}
}
for propertyName, property := range dm.Properties {
err = property.Validate(cache, append(path, propertyName), fieldAliasCtx)
if err != nil {
return err
}
}
for _, field := range dm.Fields {
if field.Analyzer != "" {
_, err = cache.AnalyzerNamed(field.Analyzer)
if err != nil {
return err
}
}
if field.DateFormat != "" {
_, err = cache.DateTimeParserNamed(field.DateFormat)
if err != nil {
return err
}
}
if field.SynonymSource != "" {
_, err = cache.SynonymSourceNamed(field.SynonymSource)
if err != nil {
return err
}
}
err := validateFieldMapping(field, path, fieldAliasCtx)
if err != nil {
return err
}
}
return nil
}
func validateFieldType(field *FieldMapping) error {
switch field.Type {
case "text", "datetime", "number", "boolean", "geopoint", "geoshape", "IP":
return nil
default:
return fmt.Errorf("field: '%s', unknown field type: '%s'",
field.Name, field.Type)
}
}
// analyzerNameForPath attempts to first find the field
// described by this path, then returns the analyzer
// configured for that field
func (dm *DocumentMapping) analyzerNameForPath(path string) string {
field := dm.fieldDescribedByPath(path)
if field != nil {
return field.Analyzer
}
return ""
}
// synonymSourceForPath attempts to first find the field
// described by this path, then returns the analyzer
// configured for that field
func (dm *DocumentMapping) synonymSourceForPath(path string) string {
field := dm.fieldDescribedByPath(path)
if field != nil {
return field.SynonymSource
}
return ""
}
func (dm *DocumentMapping) fieldDescribedByPath(path string) *FieldMapping {
pathElements := decodePath(path)
if len(pathElements) > 1 {
// easy case, there is more than 1 path element remaining
// the next path element must match a property name
// at this level
for propName, subDocMapping := range dm.Properties {
if propName == pathElements[0] {
return subDocMapping.fieldDescribedByPath(encodePath(pathElements[1:]))
}
}
}
// either the path just had one element
// or it had multiple, but no match for the first element at this level
// look for match with full path
// first look for property name with empty field
for propName, subDocMapping := range dm.Properties {
if propName == path {
// found property name match, now look at its fields
for _, field := range subDocMapping.Fields {
if field.Name == "" || field.Name == path {
// match
return field
}
}
}
}
// next, walk the properties again, looking for field overriding the name
for propName, subDocMapping := range dm.Properties {
if propName != path {
// property name isn't a match, but field name could override it
for _, field := range subDocMapping.Fields {
if field.Name == path {
return field
}
}
}
}
return nil
}
// documentMappingForPathElements returns the EXACT and closest matches for a sub
// document or for an explicitly mapped field; the closest most specific
// document mapping could be one that matches part of the provided path.
func (dm *DocumentMapping) documentMappingForPathElements(pathElements []string) (
*DocumentMapping, *DocumentMapping,
) {
var pathElementsCopy []string
if len(pathElements) == 0 {
pathElementsCopy = []string{""}
} else {
pathElementsCopy = pathElements
}
current := dm
OUTER:
for i, pathElement := range pathElementsCopy {
if subDocMapping, exists := current.Properties[pathElement]; exists {
current = subDocMapping
continue OUTER
}
// no subDocMapping matches this pathElement
// only if this is the last element check for field name
if i == len(pathElementsCopy)-1 {
for _, field := range current.Fields {
if field.Name == pathElement {
break
}
}
}
return nil, current
}
return current, current
}
// documentMappingForPath returns the EXACT and closest matches for a sub
// document or for an explicitly mapped field; the closest most specific
// document mapping could be one that matches part of the provided path.
func (dm *DocumentMapping) documentMappingForPath(path string) (
*DocumentMapping, *DocumentMapping,
) {
pathElements := decodePath(path)
return dm.documentMappingForPathElements(pathElements)
}
// NewDocumentMapping returns a new document mapping
// with all the default values.
func NewDocumentMapping() *DocumentMapping {
return &DocumentMapping{
Enabled: true,
Dynamic: true,
}
}
// NewNestedDocumentMapping returns a new document
// mapping that treats sub-documents as nested
// objects.
func NewNestedDocumentMapping() *DocumentMapping {
return &DocumentMapping{
Nested: true,
Enabled: true,
Dynamic: true,
}
}
// NewDocumentStaticMapping returns a new document
// mapping that will not automatically index parts
// of a document without an explicit mapping.
func NewDocumentStaticMapping() *DocumentMapping {
return &DocumentMapping{
Enabled: true,
}
}
// NewNestedDocumentStaticMapping returns a new document
// mapping that treats sub-documents as nested
// objects and will not automatically index parts
// of the nested document without an explicit mapping.
func NewNestedDocumentStaticMapping() *DocumentMapping {
return &DocumentMapping{
Enabled: true,
Nested: true,
}
}
// NewDocumentDisabledMapping returns a new document
// mapping that will not perform any indexing.
func NewDocumentDisabledMapping() *DocumentMapping {
return &DocumentMapping{}
}
// AddSubDocumentMapping adds the provided DocumentMapping as a sub-mapping
// for the specified named subsection.
func (dm *DocumentMapping) AddSubDocumentMapping(property string, sdm *DocumentMapping) {
if dm.Properties == nil {
dm.Properties = make(map[string]*DocumentMapping)
}
dm.Properties[property] = sdm
}
// AddFieldMappingsAt adds one or more FieldMappings
// at the named sub-document. If the named sub-document
// doesn't yet exist it is created for you.
// This is a convenience function to make most common
// mappings more concise.
// Otherwise, you would:
//
// subMapping := NewDocumentMapping()
// subMapping.AddFieldMapping(fieldMapping)
// parentMapping.AddSubDocumentMapping(property, subMapping)
func (dm *DocumentMapping) AddFieldMappingsAt(property string, fms ...*FieldMapping) {
if dm.Properties == nil {
dm.Properties = make(map[string]*DocumentMapping)
}
sdm, ok := dm.Properties[property]
if !ok {
sdm = NewDocumentMapping()
}
for _, fm := range fms {
sdm.AddFieldMapping(fm)
}
dm.Properties[property] = sdm
}
// AddFieldMapping adds the provided FieldMapping for this section
// of the document.
func (dm *DocumentMapping) AddFieldMapping(fm *FieldMapping) {
if dm.Fields == nil {
dm.Fields = make([]*FieldMapping, 0)
}
dm.Fields = append(dm.Fields, fm)
}
// UnmarshalJSON offers custom unmarshaling with optional strict validation
func (dm *DocumentMapping) UnmarshalJSON(data []byte) error {
var tmp map[string]json.RawMessage
err := util.UnmarshalJSON(data, &tmp)
if err != nil {
return err
}
// set defaults for fields which might have been omitted
dm.Enabled = true
dm.Dynamic = true
var invalidKeys []string
for k, v := range tmp {
switch k {
case "enabled":
err := util.UnmarshalJSON(v, &dm.Enabled)
if err != nil {
return err
}
case "dynamic":
err := util.UnmarshalJSON(v, &dm.Dynamic)
if err != nil {
return err
}
case "nested":
err := util.UnmarshalJSON(v, &dm.Nested)
if err != nil {
return err
}
case "default_analyzer":
err := util.UnmarshalJSON(v, &dm.DefaultAnalyzer)
if err != nil {
return err
}
case "default_synonym_source":
err := util.UnmarshalJSON(v, &dm.DefaultSynonymSource)
if err != nil {
return err
}
case "properties":
err := util.UnmarshalJSON(v, &dm.Properties)
if err != nil {
return err
}
case "fields":
err := util.UnmarshalJSON(v, &dm.Fields)
if err != nil {
return err
}
case "struct_tag_key":
err := util.UnmarshalJSON(v, &dm.StructTagKey)
if err != nil {
return err
}
default:
invalidKeys = append(invalidKeys, k)
}
}
if MappingJSONStrict && len(invalidKeys) > 0 {
return fmt.Errorf("document mapping contains invalid keys: %v", invalidKeys)
}
return nil
}
func (dm *DocumentMapping) defaultAnalyzerName(path []string) string {
current := dm
rv := current.DefaultAnalyzer
for _, pathElement := range path {
var ok bool
current, ok = current.Properties[pathElement]
if !ok {
break
}
if current.DefaultAnalyzer != "" {
rv = current.DefaultAnalyzer
}
}
return rv
}
func (dm *DocumentMapping) defaultSynonymSource(path []string) string {
current := dm
rv := current.DefaultSynonymSource
for _, pathElement := range path {
var ok bool
current, ok = current.Properties[pathElement]
if !ok {
break
}
if current.DefaultSynonymSource != "" {
rv = current.DefaultSynonymSource
}
}
return rv
}
// baseType returns the base type of v by dereferencing pointers
func baseType(v interface{}) reflect.Type {
if v == nil {
return nil
}
t := reflect.TypeOf(v)
for t.Kind() == reflect.Pointer {
t = t.Elem()
}
return t
}
func (dm *DocumentMapping) walkDocument(data interface{}, path []string, indexes []uint64, context *walkContext) {
// allow default "json" tag to be overridden
structTagKey := dm.StructTagKey
if structTagKey == "" {
structTagKey = "json"
}
val := reflect.ValueOf(data)
if !val.IsValid() {
return
}
typ := val.Type()
switch typ.Kind() {
case reflect.Map:
// FIXME can add support for other map keys in the future
if typ.Key().Kind() == reflect.String {
for _, key := range val.MapKeys() {
fieldName := key.String()
fieldVal := val.MapIndex(key).Interface()
dm.processProperty(fieldVal, append(path, fieldName), indexes, context)
}
}
case reflect.Struct:
for i := 0; i < val.NumField(); i++ {
field := typ.Field(i)
fieldName := field.Name
// anonymous fields of type struct can elide the type name
if field.Anonymous && field.Type.Kind() == reflect.Struct {
fieldName = ""
}
// if the field has a name under the specified tag, prefer that
tag := field.Tag.Get(structTagKey)
tagFieldName := parseTagName(tag)
if tagFieldName == "-" {
continue
}
// allow tag to set field name to empty, only if anonymous
if field.Tag != "" && (tagFieldName != "" || field.Anonymous) {
fieldName = tagFieldName
}
if val.Field(i).CanInterface() {
fieldVal := val.Field(i).Interface()
newpath := path
if fieldName != "" {
newpath = append(path, fieldName)
}
dm.processProperty(fieldVal, newpath, indexes, context)
}
}
case reflect.Slice, reflect.Array:
subDocMapping, _ := dm.documentMappingForPathElements(path)
allowNested := subDocMapping != nil && subDocMapping.Nested
for i := 0; i < val.Len(); i++ {
// for each array element, check if it can be represented as an interface
idxVal := val.Index(i)
// skip invalid values
if !idxVal.CanInterface() {
continue
}
// get the actual value in interface form
actual := idxVal.Interface()
// if nested mapping, only create nested document for object elements
if allowNested && actual != nil {
// check the kind of the actual value, is it an object (struct or map)?
typ := baseType(actual)
if typ == nil {
continue
}
kind := typ.Kind()
// only create nested docs for real JSON objects
if kind == reflect.Struct || kind == reflect.Map {
// Create nested document only for only object elements
nestedDocument := document.NewDocument(
fmt.Sprintf("%s_$%s_$%d", context.doc.ID(), encodePath(path), i))
nestedContext := context.im.newWalkContext(nestedDocument, dm)
dm.processProperty(actual, path, append(indexes, uint64(i)), nestedContext)
context.doc.AddNestedDocument(nestedDocument)
continue
}
}
// non-nested mapping, or non-object element in nested mapping
// process the element normally
dm.processProperty(actual, path, append(indexes, uint64(i)), context)
}
case reflect.Ptr:
ptrElem := val.Elem()
if ptrElem.IsValid() && ptrElem.CanInterface() {
dm.processProperty(ptrElem.Interface(), path, indexes, context)
}
case reflect.String:
dm.processProperty(val.String(), path, indexes, context)
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
dm.processProperty(float64(val.Int()), path, indexes, context)
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
dm.processProperty(float64(val.Uint()), path, indexes, context)
case reflect.Float32, reflect.Float64:
dm.processProperty(float64(val.Float()), path, indexes, context)
case reflect.Bool:
dm.processProperty(val.Bool(), path, indexes, context)
}
}
func (dm *DocumentMapping) processProperty(property interface{}, path []string, indexes []uint64, context *walkContext) {
// look to see if there is a mapping for this field
subDocMapping, closestDocMapping := dm.documentMappingForPathElements(path)
// check to see if we even need to do further processing
if subDocMapping != nil && !subDocMapping.Enabled {
return
}
propertyValue := reflect.ValueOf(property)
if !propertyValue.IsValid() {
// cannot do anything with the zero value
return
}
pathString := encodePath(path)
propertyType := propertyValue.Type()
switch propertyType.Kind() {
case reflect.String:
propertyValueString := propertyValue.String()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
switch fieldMapping.Type {
case "geoshape":
fieldMapping.processGeoShape(property, pathString, path, indexes, context)
case "geopoint":
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
case "vector_base64":
fieldMapping.processVectorBase64(property, pathString, path, indexes, context)
default:
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
}
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
// first see if it can be parsed by the default date parser
dateTimeParser := context.im.DateTimeParserNamed(context.im.DefaultDateTimeParser)
if dateTimeParser != nil {
parsedDateTime, layout, err := dateTimeParser.ParseDateTime(propertyValueString)
if err != nil {
// index as text
fieldMapping := newTextFieldMappingDynamic(context.im)
fieldMapping.processString(propertyValueString, pathString, path, indexes, context)
} else {
// index as datetime
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
fieldMapping.processTime(parsedDateTime, layout, pathString, path, indexes, context)
}
}
}
case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
dm.processProperty(float64(propertyValue.Int()), path, indexes, context)
return
case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
dm.processProperty(float64(propertyValue.Uint()), path, indexes, context)
return
case reflect.Float64, reflect.Float32:
propertyValFloat := propertyValue.Float()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
fieldMapping := newNumericFieldMappingDynamic(context.im)
fieldMapping.processFloat64(propertyValFloat, pathString, path, indexes, context)
}
case reflect.Bool:
propertyValBool := propertyValue.Bool()
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
// automatic indexing behavior
fieldMapping := newBooleanFieldMappingDynamic(context.im)
fieldMapping.processBoolean(propertyValBool, pathString, path, indexes, context)
}
case reflect.Struct:
switch property := property.(type) {
case time.Time:
// don't descend into the time struct
if subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processTime(property, time.RFC3339, pathString, path, indexes, context)
}
} else if closestDocMapping.Dynamic {
fieldMapping := newDateTimeFieldMappingDynamic(context.im)
fieldMapping.processTime(property, time.RFC3339, pathString, path, indexes, context)
}
case encoding.TextMarshaler:
txt, err := property.MarshalText()
if err == nil && subDocMapping != nil {
// index by explicit mapping
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type == "text" {
fieldMapping.processString(string(txt), pathString, path, indexes, context)
}
}
}
dm.walkDocument(property, path, indexes, context)
default:
if subDocMapping != nil {
for _, fieldMapping := range subDocMapping.Fields {
switch fieldMapping.Type {
case "geopoint":
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
case "geoshape":
fieldMapping.processGeoShape(property, pathString, path, indexes, context)
}
}
}
dm.walkDocument(property, path, indexes, context)
}
case reflect.Map, reflect.Slice:
walkDocument := false
if subDocMapping != nil && len(subDocMapping.Fields) != 0 {
for _, fieldMapping := range subDocMapping.Fields {
switch fieldMapping.Type {
case "vector":
fieldMapping.processVector(property, pathString, path,
indexes, context)
case "geopoint":
fieldMapping.processGeoPoint(property, pathString, path, indexes, context)
walkDocument = true
case "IP":
ip, ok := property.(net.IP)
if ok {
fieldMapping.processIP(ip, pathString, path, indexes, context)
}
walkDocument = true
case "geoshape":
fieldMapping.processGeoShape(property, pathString, path, indexes, context)
walkDocument = true
default:
walkDocument = true
}
}
} else {
walkDocument = true
}
if walkDocument {
dm.walkDocument(property, path, indexes, context)
}
case reflect.Ptr:
if !propertyValue.IsNil() {
switch property := property.(type) {
case encoding.TextMarshaler:
// ONLY process TextMarshaler if there is an explicit mapping
// AND all of the fields are of type text
// OTHERWISE process field without TextMarshaler
if subDocMapping != nil {
allFieldsText := true
for _, fieldMapping := range subDocMapping.Fields {
if fieldMapping.Type != "text" {
allFieldsText = false
break
}
}
txt, err := property.MarshalText()
if err == nil && allFieldsText {
txtStr := string(txt)
for _, fieldMapping := range subDocMapping.Fields {
fieldMapping.processString(txtStr, pathString, path, indexes, context)
}
return
}
}
dm.walkDocument(property, path, indexes, context)
default:
dm.walkDocument(property, path, indexes, context)
}
}
default:
dm.walkDocument(property, path, indexes, context)
}
}
================================================
FILE: mapping/examples_test.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
import "fmt"
// Examples for Mapping related functions
func ExampleDocumentMapping_AddSubDocumentMapping() {
// adds a document mapping for a property in a document
// useful for mapping nested documents
documentMapping := NewDocumentMapping()
subDocumentMapping := NewDocumentMapping()
documentMapping.AddSubDocumentMapping("Property", subDocumentMapping)
fmt.Println(len(documentMapping.Properties))
// Output:
// 1
}
func ExampleDocumentMapping_AddFieldMapping() {
// you can only add field mapping to those properties which already have a document mapping
documentMapping := NewDocumentMapping()
subDocumentMapping := NewDocumentMapping()
documentMapping.AddSubDocumentMapping("Property", subDocumentMapping)
fieldMapping := NewTextFieldMapping()
fieldMapping.Analyzer = "en"
subDocumentMapping.AddFieldMapping(fieldMapping)
fmt.Println(len(documentMapping.Properties["Property"].Fields))
// Output:
// 1
}
func ExampleDocumentMapping_AddFieldMappingsAt() {
// you can only add field mapping to those properties which already have a document mapping
documentMapping := NewDocumentMapping()
subDocumentMapping := NewDocumentMapping()
documentMapping.AddSubDocumentMapping("NestedProperty", subDocumentMapping)
fieldMapping := NewTextFieldMapping()
fieldMapping.Analyzer = "en"
documentMapping.AddFieldMappingsAt("NestedProperty", fieldMapping)
fmt.Println(len(documentMapping.Properties["NestedProperty"].Fields))
// Output:
// 1
}
================================================
FILE: mapping/field.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
import (
"encoding/json"
"fmt"
"net"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
"github.com/blevesearch/geo/geojson"
)
// control the default behavior for dynamic fields (those not explicitly mapped)
var (
IndexDynamic = true
StoreDynamic = true
DocValuesDynamic = true // TODO revisit default?
)
// A FieldMapping describes how a specific item
// should be put into the index.
type FieldMapping struct {
Name string `json:"name,omitempty"`
Type string `json:"type,omitempty"`
// Analyzer specifies the name of the analyzer to use for this field. If
// Analyzer is empty, traverse the DocumentMapping tree toward the root and
// pick the first non-empty DefaultAnalyzer found. If there is none, use
// the IndexMapping.DefaultAnalyzer.
Analyzer string `json:"analyzer,omitempty"`
// Store indicates whether to store field values in the index. Stored
// values can be retrieved from search results using SearchRequest.Fields.
Store bool `json:"store,omitempty"`
Index bool `json:"index,omitempty"`
// IncludeTermVectors, if true, makes terms occurrences to be recorded for
// this field. It includes the term position within the terms sequence and
// the term offsets in the source document field. Term vectors are required
// to perform phrase queries or terms highlighting in source documents.
IncludeTermVectors bool `json:"include_term_vectors,omitempty"`
IncludeInAll bool `json:"include_in_all,omitempty"`
DateFormat string `json:"date_format,omitempty"`
// DocValues, if true makes the index uninverting possible for this field
// It is useful for faceting and sorting queries.
DocValues bool `json:"docvalues,omitempty"`
// SkipFreqNorm, if true, avoids the indexing of frequency and norm values
// of the tokens for this field. This option would be useful for saving
// the processing of freq/norm details when the default score based relevancy
// isn't needed.
SkipFreqNorm bool `json:"skip_freq_norm,omitempty"`
// Dimensionality of the vector
Dims int `json:"dims,omitempty"`
// Similarity is the similarity algorithm used for scoring
// field's content while performing search on it.
// See: index.SimilarityModels
Similarity string `json:"similarity,omitempty"`
// Applicable to vector fields only - optimization string
VectorIndexOptimizedFor string `json:"vector_index_optimized_for,omitempty"`
SynonymSource string `json:"synonym_source,omitempty"`
}
// NewTextFieldMapping returns a default field mapping for text
func NewTextFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "text",
Store: true,
Index: true,
IncludeTermVectors: true,
IncludeInAll: true,
DocValues: true,
}
}
func newTextFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
rv := NewTextFieldMapping()
rv.Store = im.StoreDynamic
rv.Index = im.IndexDynamic
rv.DocValues = im.DocValuesDynamic
return rv
}
// NewKeywordFieldMapping returns a default field mapping for text with analyzer "keyword".
func NewKeywordFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "text",
Analyzer: keyword.Name,
Store: true,
Index: true,
IncludeTermVectors: true,
IncludeInAll: true,
DocValues: true,
}
}
// NewNumericFieldMapping returns a default field mapping for numbers
func NewNumericFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "number",
Store: true,
Index: true,
IncludeInAll: true,
DocValues: true,
}
}
func newNumericFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
rv := NewNumericFieldMapping()
rv.Store = im.StoreDynamic
rv.Index = im.IndexDynamic
rv.DocValues = im.DocValuesDynamic
return rv
}
// NewDateTimeFieldMapping returns a default field mapping for dates
func NewDateTimeFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "datetime",
Store: true,
Index: true,
IncludeInAll: true,
DocValues: true,
}
}
func newDateTimeFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
rv := NewDateTimeFieldMapping()
rv.Store = im.StoreDynamic
rv.Index = im.IndexDynamic
rv.DocValues = im.DocValuesDynamic
return rv
}
// NewBooleanFieldMapping returns a default field mapping for booleans
func NewBooleanFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "boolean",
Store: true,
Index: true,
IncludeInAll: true,
DocValues: true,
}
}
func newBooleanFieldMappingDynamic(im *IndexMappingImpl) *FieldMapping {
rv := NewBooleanFieldMapping()
rv.Store = im.StoreDynamic
rv.Index = im.IndexDynamic
rv.DocValues = im.DocValuesDynamic
return rv
}
// NewGeoPointFieldMapping returns a default field mapping for geo points
func NewGeoPointFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "geopoint",
Store: true,
Index: true,
IncludeInAll: true,
DocValues: true,
}
}
// NewGeoShapeFieldMapping returns a default field mapping
// for geoshapes
func NewGeoShapeFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "geoshape",
Store: true,
Index: true,
IncludeInAll: true,
DocValues: true,
}
}
// NewIPFieldMapping returns a default field mapping for IP points
func NewIPFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "IP",
Store: true,
Index: true,
IncludeInAll: true,
}
}
// Options returns the indexing options for this field.
func (fm *FieldMapping) Options() index.FieldIndexingOptions {
var rv index.FieldIndexingOptions
if fm.Store {
rv |= index.StoreField
}
if fm.Index {
rv |= index.IndexField
}
if fm.IncludeTermVectors {
rv |= index.IncludeTermVectors
}
if fm.DocValues {
rv |= index.DocValues
}
if fm.SkipFreqNorm {
rv |= index.SkipFreqNorm
}
return rv
}
func (fm *FieldMapping) processString(propertyValueString string, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
switch fm.Type {
case "text":
analyzer := fm.analyzerForField(path, context)
field := document.NewTextFieldCustom(fieldName, indexes, []byte(propertyValueString), options, analyzer)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
case "datetime":
dateTimeFormat := context.im.DefaultDateTimeParser
if fm.DateFormat != "" {
dateTimeFormat = fm.DateFormat
}
dateTimeParser := context.im.DateTimeParserNamed(dateTimeFormat)
if dateTimeParser != nil {
parsedDateTime, layout, err := dateTimeParser.ParseDateTime(propertyValueString)
if err == nil {
fm.processTime(parsedDateTime, layout, pathString, path, indexes, context)
}
}
case "IP":
ip := net.ParseIP(propertyValueString)
if ip != nil {
fm.processIP(ip, pathString, path, indexes, context)
}
}
}
func (fm *FieldMapping) processFloat64(propertyValFloat float64, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
if fm.Type == "number" {
options := fm.Options()
field := document.NewNumericFieldWithIndexingOptions(fieldName, indexes, propertyValFloat, options)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}
func (fm *FieldMapping) processTime(propertyValueTime time.Time, layout string, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
if fm.Type == "datetime" {
options := fm.Options()
field, err := document.NewDateTimeFieldWithIndexingOptions(fieldName, indexes, propertyValueTime, layout, options)
if err == nil {
context.doc.AddField(field)
} else {
logger.Printf("could not build date %v", err)
}
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}
func (fm *FieldMapping) processBoolean(propertyValueBool bool, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
if fm.Type == "boolean" {
options := fm.Options()
field := document.NewBooleanFieldWithIndexingOptions(fieldName, indexes, propertyValueBool, options)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}
func (fm *FieldMapping) processGeoPoint(propertyMightBeGeoPoint interface{}, pathString string, path []string, indexes []uint64, context *walkContext) {
lon, lat, found := geo.ExtractGeoPoint(propertyMightBeGeoPoint)
if found {
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewGeoPointFieldWithIndexingOptions(fieldName, indexes, lon, lat, options)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}
func (fm *FieldMapping) processIP(ip net.IP, pathString string, path []string, indexes []uint64, context *walkContext) {
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewIPFieldWithIndexingOptions(fieldName, indexes, ip, options)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
func (fm *FieldMapping) processGeoShape(propertyMightBeGeoShape interface{},
pathString string, path []string, indexes []uint64, context *walkContext,
) {
coordValue, shape, err := geo.ParseGeoShapeField(propertyMightBeGeoShape)
if err != nil {
return
}
if shape == geo.GeometryCollectionType {
geoShapes, found := geo.ExtractGeometryCollection(propertyMightBeGeoShape)
if found {
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewGeometryCollectionFieldFromShapesWithIndexingOptions(fieldName,
indexes, geoShapes, options)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
} else {
var geoShape *geojson.GeoShape
var found bool
if shape == geo.CircleType {
geoShape, found = geo.ExtractCircle(propertyMightBeGeoShape)
} else {
geoShape, found = geo.ExtractGeoShapeCoordinates(coordValue, shape)
}
if found {
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewGeoShapeFieldFromShapeWithIndexingOptions(fieldName,
indexes, geoShape, options)
context.doc.AddField(field)
if !fm.IncludeInAll {
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
}
}
}
func (fm *FieldMapping) analyzerForField(path []string, context *walkContext) analysis.Analyzer {
analyzerName := fm.Analyzer
if analyzerName == "" {
analyzerName = context.dm.defaultAnalyzerName(path)
if analyzerName == "" {
analyzerName = context.im.DefaultAnalyzer
}
}
return context.im.AnalyzerNamed(analyzerName)
}
func getFieldName(pathString string, path []string, fieldMapping *FieldMapping) string {
fieldName := pathString
if fieldMapping.Name != "" {
parentName := ""
if len(path) > 1 {
parentName = encodePath(path[:len(path)-1]) + pathSeparator
}
fieldName = parentName + fieldMapping.Name
}
return fieldName
}
// UnmarshalJSON offers custom unmarshaling with optional strict validation
func (fm *FieldMapping) UnmarshalJSON(data []byte) error {
var tmp map[string]json.RawMessage
err := util.UnmarshalJSON(data, &tmp)
if err != nil {
return err
}
var invalidKeys []string
for k, v := range tmp {
switch k {
case "name":
err := util.UnmarshalJSON(v, &fm.Name)
if err != nil {
return err
}
case "type":
err := util.UnmarshalJSON(v, &fm.Type)
if err != nil {
return err
}
case "analyzer":
err := util.UnmarshalJSON(v, &fm.Analyzer)
if err != nil {
return err
}
case "store":
err := util.UnmarshalJSON(v, &fm.Store)
if err != nil {
return err
}
case "index":
err := util.UnmarshalJSON(v, &fm.Index)
if err != nil {
return err
}
case "include_term_vectors":
err := util.UnmarshalJSON(v, &fm.IncludeTermVectors)
if err != nil {
return err
}
case "include_in_all":
err := util.UnmarshalJSON(v, &fm.IncludeInAll)
if err != nil {
return err
}
case "date_format":
err := util.UnmarshalJSON(v, &fm.DateFormat)
if err != nil {
return err
}
case "docvalues":
err := util.UnmarshalJSON(v, &fm.DocValues)
if err != nil {
return err
}
case "skip_freq_norm":
err := util.UnmarshalJSON(v, &fm.SkipFreqNorm)
if err != nil {
return err
}
case "dims":
err := util.UnmarshalJSON(v, &fm.Dims)
if err != nil {
return err
}
case "similarity":
err := util.UnmarshalJSON(v, &fm.Similarity)
if err != nil {
return err
}
case "vector_index_optimized_for":
err := util.UnmarshalJSON(v, &fm.VectorIndexOptimizedFor)
if err != nil {
return err
}
case "synonym_source":
err := util.UnmarshalJSON(v, &fm.SynonymSource)
if err != nil {
return err
}
default:
invalidKeys = append(invalidKeys, k)
}
}
if MappingJSONStrict && len(invalidKeys) > 0 {
return fmt.Errorf("field mapping contains invalid keys: %v", invalidKeys)
}
return nil
}
================================================
FILE: mapping/index.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
import (
"encoding/json"
"fmt"
"strings"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
"github.com/blevesearch/bleve/v2/analysis/datetime/optional"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
var MappingJSONStrict = false
const defaultTypeField = "_type"
const defaultType = "_default"
const defaultField = "_all"
const defaultAnalyzer = standard.Name
const defaultDateTimeParser = optional.Name
// An IndexMappingImpl controls how objects are placed
// into an index.
// First the type of the object is determined.
// Once the type is know, the appropriate
// DocumentMapping is selected by the type.
// If no mapping was determined for that type,
// a DefaultMapping will be used.
type IndexMappingImpl struct {
TypeMapping map[string]*DocumentMapping `json:"types,omitempty"`
DefaultMapping *DocumentMapping `json:"default_mapping"`
TypeField string `json:"type_field"`
DefaultType string `json:"default_type"`
DefaultAnalyzer string `json:"default_analyzer"`
DefaultDateTimeParser string `json:"default_datetime_parser"`
DefaultSynonymSource string `json:"default_synonym_source,omitempty"`
ScoringModel string `json:"scoring_model,omitempty"`
DefaultField string `json:"default_field"`
StoreDynamic bool `json:"store_dynamic"`
IndexDynamic bool `json:"index_dynamic"`
DocValuesDynamic bool `json:"docvalues_dynamic"`
CustomAnalysis *customAnalysis `json:"analysis,omitempty"`
cache *registry.Cache
}
// AddCustomCharFilter defines a custom char filter for use in this mapping
func (im *IndexMappingImpl) AddCustomCharFilter(name string, config map[string]interface{}) error {
_, err := im.cache.DefineCharFilter(name, config)
if err != nil {
return err
}
im.CustomAnalysis.CharFilters[name] = config
return nil
}
// AddCustomTokenizer defines a custom tokenizer for use in this mapping
func (im *IndexMappingImpl) AddCustomTokenizer(name string, config map[string]interface{}) error {
_, err := im.cache.DefineTokenizer(name, config)
if err != nil {
return err
}
im.CustomAnalysis.Tokenizers[name] = config
return nil
}
// AddCustomTokenMap defines a custom token map for use in this mapping
func (im *IndexMappingImpl) AddCustomTokenMap(name string, config map[string]interface{}) error {
_, err := im.cache.DefineTokenMap(name, config)
if err != nil {
return err
}
im.CustomAnalysis.TokenMaps[name] = config
return nil
}
// AddCustomTokenFilter defines a custom token filter for use in this mapping
func (im *IndexMappingImpl) AddCustomTokenFilter(name string, config map[string]interface{}) error {
_, err := im.cache.DefineTokenFilter(name, config)
if err != nil {
return err
}
im.CustomAnalysis.TokenFilters[name] = config
return nil
}
// AddCustomAnalyzer defines a custom analyzer for use in this mapping. The
// config map must have a "type" string entry to resolve the analyzer
// constructor. The constructor is invoked with the remaining entries and
// returned analyzer is registered in the IndexMapping.
//
// bleve comes with predefined analyzers, like
// github.com/blevesearch/bleve/analysis/analyzer/custom. They are
// available only if their package is imported by client code. To achieve this,
// use their metadata to fill configuration entries:
//
// import (
// "github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
// "github.com/blevesearch/bleve/v2/analysis/char/html"
// "github.com/blevesearch/bleve/v2/analysis/token/lowercase"
// "github.com/blevesearch/bleve/v2/analysis/tokenizer/unicode"
// )
//
// m := bleve.NewIndexMapping()
// err := m.AddCustomAnalyzer("html", map[string]interface{}{
// "type": custom.Name,
// "char_filters": []string{
// html.Name,
// },
// "tokenizer": unicode.Name,
// "token_filters": []string{
// lowercase.Name,
// ...
// },
// })
func (im *IndexMappingImpl) AddCustomAnalyzer(name string, config map[string]interface{}) error {
_, err := im.cache.DefineAnalyzer(name, config)
if err != nil {
return err
}
im.CustomAnalysis.Analyzers[name] = config
return nil
}
// AddCustomDateTimeParser defines a custom date time parser for use in this mapping
func (im *IndexMappingImpl) AddCustomDateTimeParser(name string, config map[string]interface{}) error {
_, err := im.cache.DefineDateTimeParser(name, config)
if err != nil {
return err
}
im.CustomAnalysis.DateTimeParsers[name] = config
return nil
}
func (im *IndexMappingImpl) AddSynonymSource(name string, config map[string]interface{}) error {
_, err := im.cache.DefineSynonymSource(name, config)
if err != nil {
return err
}
im.CustomAnalysis.SynonymSources[name] = config
return nil
}
// NewIndexMapping creates a new IndexMapping that will use all the default indexing rules
func NewIndexMapping() *IndexMappingImpl {
return &IndexMappingImpl{
TypeMapping: make(map[string]*DocumentMapping),
DefaultMapping: NewDocumentMapping(),
TypeField: defaultTypeField,
DefaultType: defaultType,
DefaultAnalyzer: defaultAnalyzer,
DefaultDateTimeParser: defaultDateTimeParser,
DefaultField: defaultField,
IndexDynamic: IndexDynamic,
StoreDynamic: StoreDynamic,
DocValuesDynamic: DocValuesDynamic,
CustomAnalysis: newCustomAnalysis(),
cache: registry.NewCache(),
}
}
// Validate will walk the entire structure ensuring the following
// explicitly named and default analyzers can be built
func (im *IndexMappingImpl) Validate() error {
_, err := im.cache.AnalyzerNamed(im.DefaultAnalyzer)
if err != nil {
return err
}
_, err = im.cache.DateTimeParserNamed(im.DefaultDateTimeParser)
if err != nil {
return err
}
if im.DefaultSynonymSource != "" {
_, err = im.cache.SynonymSourceNamed(im.DefaultSynonymSource)
if err != nil {
return err
}
}
// fieldAliasCtx is used to detect any field alias conflicts across the entire mapping
// the map will hold the fully qualified field name to FieldMapping, so we can
// check for conflicts as we validate each DocumentMapping.
fieldAliasCtx := make(map[string]*FieldMapping)
// ensure that the nested property is not set for top-level default mapping
if im.DefaultMapping.Nested {
return fmt.Errorf("default mapping cannot be nested")
}
err = im.DefaultMapping.Validate(im.cache, []string{}, fieldAliasCtx)
if err != nil {
return err
}
for name, docMapping := range im.TypeMapping {
// ensure that the nested property is not set for top-level mappings
if docMapping.Nested {
return fmt.Errorf("type mapping named: %s cannot be nested", name)
}
err = docMapping.Validate(im.cache, []string{}, fieldAliasCtx)
if err != nil {
return err
}
}
if _, ok := index.SupportedScoringModels[im.ScoringModel]; !ok && im.ScoringModel != "" {
return fmt.Errorf("unsupported scoring model: %s", im.ScoringModel)
}
return nil
}
// AddDocumentMapping sets a custom document mapping for the specified type
func (im *IndexMappingImpl) AddDocumentMapping(doctype string, dm *DocumentMapping) {
im.TypeMapping[doctype] = dm
}
func (im *IndexMappingImpl) mappingForType(docType string) *DocumentMapping {
docMapping := im.TypeMapping[docType]
if docMapping == nil {
docMapping = im.DefaultMapping
}
return docMapping
}
// UnmarshalJSON offers custom unmarshaling with optional strict validation
func (im *IndexMappingImpl) UnmarshalJSON(data []byte) error {
var tmp map[string]json.RawMessage
err := util.UnmarshalJSON(data, &tmp)
if err != nil {
return err
}
// set defaults for fields which might have been omitted
im.cache = registry.NewCache()
im.CustomAnalysis = newCustomAnalysis()
im.TypeField = defaultTypeField
im.DefaultType = defaultType
im.DefaultAnalyzer = defaultAnalyzer
im.DefaultDateTimeParser = defaultDateTimeParser
im.DefaultField = defaultField
im.DefaultMapping = NewDocumentMapping()
im.TypeMapping = make(map[string]*DocumentMapping)
im.StoreDynamic = StoreDynamic
im.IndexDynamic = IndexDynamic
im.DocValuesDynamic = DocValuesDynamic
var invalidKeys []string
for k, v := range tmp {
switch k {
case "analysis":
err := util.UnmarshalJSON(v, &im.CustomAnalysis)
if err != nil {
return err
}
case "type_field":
err := util.UnmarshalJSON(v, &im.TypeField)
if err != nil {
return err
}
case "default_type":
err := util.UnmarshalJSON(v, &im.DefaultType)
if err != nil {
return err
}
case "default_analyzer":
err := util.UnmarshalJSON(v, &im.DefaultAnalyzer)
if err != nil {
return err
}
case "default_datetime_parser":
err := util.UnmarshalJSON(v, &im.DefaultDateTimeParser)
if err != nil {
return err
}
case "default_synonym_source":
err := util.UnmarshalJSON(v, &im.DefaultSynonymSource)
if err != nil {
return err
}
case "default_field":
err := util.UnmarshalJSON(v, &im.DefaultField)
if err != nil {
return err
}
case "default_mapping":
err := util.UnmarshalJSON(v, &im.DefaultMapping)
if err != nil {
return err
}
case "types":
err := util.UnmarshalJSON(v, &im.TypeMapping)
if err != nil {
return err
}
case "store_dynamic":
err := util.UnmarshalJSON(v, &im.StoreDynamic)
if err != nil {
return err
}
case "index_dynamic":
err := util.UnmarshalJSON(v, &im.IndexDynamic)
if err != nil {
return err
}
case "docvalues_dynamic":
err := util.UnmarshalJSON(v, &im.DocValuesDynamic)
if err != nil {
return err
}
case "scoring_model":
err := util.UnmarshalJSON(v, &im.ScoringModel)
if err != nil {
return err
}
default:
invalidKeys = append(invalidKeys, k)
}
}
if MappingJSONStrict && len(invalidKeys) > 0 {
return fmt.Errorf("index mapping contains invalid keys: %v", invalidKeys)
}
err = im.CustomAnalysis.registerAll(im)
if err != nil {
return err
}
return nil
}
func (im *IndexMappingImpl) determineType(data interface{}) string {
// first see if the object implements bleveClassifier
bleveClassifier, ok := data.(bleveClassifier)
if ok {
return bleveClassifier.BleveType()
}
// next see if the object implements Classifier
classifier, ok := data.(Classifier)
if ok {
return classifier.Type()
}
// now see if we can find a type using the mapping
typ, ok := mustString(lookupPropertyPath(data, im.TypeField))
if ok {
return typ
}
return im.DefaultType
}
func (im *IndexMappingImpl) MapDocument(doc *document.Document, data interface{}) error {
docType := im.determineType(data)
docMapping := im.mappingForType(docType)
if docMapping.Enabled {
walkContext := im.newWalkContext(doc, docMapping)
docMapping.walkDocument(data, []string{}, []uint64{}, walkContext)
// see if the _all field was disabled
allMapping, _ := docMapping.documentMappingForPath("_all")
if allMapping == nil || allMapping.Enabled {
field := document.NewCompositeFieldWithIndexingOptions("_all", true, []string{}, walkContext.excludedFromAll, index.IndexField|index.IncludeTermVectors)
doc.AddField(field)
}
doc.SetIndexed()
}
return nil
}
func (im *IndexMappingImpl) MapSynonymDocument(doc *document.Document, collection string, input []string, synonyms []string) error {
// determine all the synonym sources with the given collection
// and create a synonym field for each
err := im.SynonymSourceVisitor(func(name string, item analysis.SynonymSource) error {
if item.Collection() == collection {
// create a new field with the name of the synonym source
analyzer := im.AnalyzerNamed(item.Analyzer())
if analyzer == nil {
return fmt.Errorf("unknown analyzer named: %s", item.Analyzer())
}
field := document.NewSynonymField(name, analyzer, input, synonyms)
doc.AddField(field)
}
return nil
})
return err
}
type walkContext struct {
doc *document.Document
im *IndexMappingImpl
dm *DocumentMapping
excludedFromAll []string
}
func (im *IndexMappingImpl) newWalkContext(doc *document.Document, dm *DocumentMapping) *walkContext {
return &walkContext{
doc: doc,
im: im,
dm: dm,
excludedFromAll: []string{"_id"},
}
}
// AnalyzerNameForPath attempts to find the best analyzer to use with only a
// field name will walk all the document types, look for field mappings at the
// provided path, if one exists and it has an explicit analyzer that is
// returned.
func (im *IndexMappingImpl) AnalyzerNameForPath(path string) string {
// first we look for explicit mapping on the field
for _, docMapping := range im.TypeMapping {
analyzerName := docMapping.analyzerNameForPath(path)
if analyzerName != "" {
return analyzerName
}
}
// now try the default mapping
pathMapping, _ := im.DefaultMapping.documentMappingForPath(path)
if pathMapping != nil {
if len(pathMapping.Fields) > 0 {
if pathMapping.Fields[0].Analyzer != "" {
return pathMapping.Fields[0].Analyzer
}
}
}
// next we will try default analyzers for the path
pathDecoded := decodePath(path)
for _, docMapping := range im.TypeMapping {
if docMapping.Enabled {
rv := docMapping.defaultAnalyzerName(pathDecoded)
if rv != "" {
return rv
}
}
}
// now the default analyzer for the default mapping
if im.DefaultMapping.Enabled {
rv := im.DefaultMapping.defaultAnalyzerName(pathDecoded)
if rv != "" {
return rv
}
}
return im.DefaultAnalyzer
}
func (im *IndexMappingImpl) AnalyzerNamed(name string) analysis.Analyzer {
analyzer, err := im.cache.AnalyzerNamed(name)
if err != nil {
logger.Printf("error using analyzer named: %s", name)
return nil
}
return analyzer
}
func (im *IndexMappingImpl) DateTimeParserNamed(name string) analysis.DateTimeParser {
if name == "" {
name = im.DefaultDateTimeParser
}
dateTimeParser, err := im.cache.DateTimeParserNamed(name)
if err != nil {
logger.Printf("error using datetime parser named: %s", name)
return nil
}
return dateTimeParser
}
func (im *IndexMappingImpl) AnalyzeText(analyzerName string, text []byte) (analysis.TokenStream, error) {
analyzer, err := im.cache.AnalyzerNamed(analyzerName)
if err != nil {
return nil, err
}
return analyzer.Analyze(text), nil
}
// FieldAnalyzer returns the name of the analyzer used on a field.
func (im *IndexMappingImpl) FieldAnalyzer(field string) string {
return im.AnalyzerNameForPath(field)
}
// FieldMappingForPath returns the mapping for a specific field 'path'.
func (im *IndexMappingImpl) FieldMappingForPath(path string) FieldMapping {
if im.TypeMapping != nil {
for _, v := range im.TypeMapping {
fm := v.fieldDescribedByPath(path)
if fm != nil {
return *fm
}
}
}
fm := im.DefaultMapping.fieldDescribedByPath(path)
if fm != nil {
return *fm
}
return FieldMapping{}
}
// wrapper to satisfy new interface
func (im *IndexMappingImpl) DefaultSearchField() string {
return im.DefaultField
}
func (im *IndexMappingImpl) SynonymSourceNamed(name string) analysis.SynonymSource {
syn, err := im.cache.SynonymSourceNamed(name)
if err != nil {
logger.Printf("error using synonym source named: %s", name)
return nil
}
return syn
}
func (im *IndexMappingImpl) SynonymSourceForPath(path string) string {
// first we look for explicit mapping on the field
for _, docMapping := range im.TypeMapping {
synonymSource := docMapping.synonymSourceForPath(path)
if synonymSource != "" {
return synonymSource
}
}
// now try the default mapping
pathMapping, _ := im.DefaultMapping.documentMappingForPath(path)
if pathMapping != nil {
if len(pathMapping.Fields) > 0 {
if pathMapping.Fields[0].SynonymSource != "" {
return pathMapping.Fields[0].SynonymSource
}
}
}
// next we will try default synonym sources for the path
pathDecoded := decodePath(path)
for _, docMapping := range im.TypeMapping {
if docMapping.Enabled {
rv := docMapping.defaultSynonymSource(pathDecoded)
if rv != "" {
return rv
}
}
}
// now the default analyzer for the default mapping
if im.DefaultMapping.Enabled {
rv := im.DefaultMapping.defaultSynonymSource(pathDecoded)
if rv != "" {
return rv
}
}
return im.DefaultSynonymSource
}
// SynonymCount() returns the number of synonym sources defined in the mapping
func (im *IndexMappingImpl) SynonymCount() int {
return len(im.CustomAnalysis.SynonymSources)
}
// SynonymSourceVisitor() allows a visitor to iterate over all synonym sources
func (im *IndexMappingImpl) SynonymSourceVisitor(visitor analysis.SynonymSourceVisitor) error {
err := im.cache.SynonymSources.VisitSynonymSources(visitor)
if err != nil {
return err
}
return nil
}
func (im *IndexMappingImpl) buildNestedPrefixes() map[string]int {
prefixDepth := make(map[string]int)
var collectNestedFields func(dm *DocumentMapping, pathComponents []string, currentDepth int)
collectNestedFields = func(dm *DocumentMapping, pathComponents []string, currentDepth int) {
for name, docMapping := range dm.Properties {
newPathComponents := append(pathComponents, name)
if docMapping.Nested {
// This is a nested field boundary
newDepth := currentDepth + 1
prefixDepth[strings.Join(newPathComponents, pathSeparator)] = newDepth
// Continue deeper with incremented depth
collectNestedFields(docMapping, newPathComponents, newDepth)
} else {
// Not nested, continue with same depth
collectNestedFields(docMapping, newPathComponents, currentDepth)
}
}
}
// Start from depth 0 (root)
if im.DefaultMapping != nil && im.DefaultMapping.Enabled {
collectNestedFields(im.DefaultMapping, []string{}, 0)
}
// Now do this for each type mapping
for _, docMapping := range im.TypeMapping {
if docMapping.Enabled {
collectNestedFields(docMapping, []string{}, 0)
}
}
return prefixDepth
}
func (im *IndexMappingImpl) NestedDepth(fs search.FieldSet) (int, int) {
if im.cache == nil || im.cache.NestedPrefixes == nil {
return 0, 0
}
im.cache.NestedPrefixes.InitOnce(func() map[string]int {
return im.buildNestedPrefixes()
})
return im.cache.NestedPrefixes.NestedDepth(fs)
}
func (im *IndexMappingImpl) CountNested() int {
if im.cache == nil || im.cache.NestedPrefixes == nil {
return 0
}
im.cache.NestedPrefixes.InitOnce(func() map[string]int {
return im.buildNestedPrefixes()
})
return im.cache.NestedPrefixes.CountNested()
}
func (im *IndexMappingImpl) IntersectsPrefix(fs search.FieldSet) bool {
if im.cache == nil || im.cache.NestedPrefixes == nil {
return false
}
im.cache.NestedPrefixes.InitOnce(func() map[string]int {
return im.buildNestedPrefixes()
})
return im.cache.NestedPrefixes.IntersectsPrefix(fs)
}
================================================
FILE: mapping/mapping.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
import (
"io"
"log"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/search"
)
// A Classifier is an interface describing any object which knows how to
// identify its own type. Alternatively, if a struct already has a Type
// field or method in conflict, one can use BleveType instead.
type Classifier interface {
Type() string
}
// A bleveClassifier is an interface describing any object which knows how
// to identify its own type. This is introduced as an alternative to the
// Classifier interface which often has naming conflicts with existing
// structures.
type bleveClassifier interface {
BleveType() string
}
var logger = log.New(io.Discard, "bleve mapping ", log.LstdFlags)
// SetLog sets the logger used for logging
// by default log messages are sent to io.Discard
func SetLog(l *log.Logger) {
logger = l
}
type IndexMapping interface {
MapDocument(doc *document.Document, data interface{}) error
Validate() error
DateTimeParserNamed(name string) analysis.DateTimeParser
DefaultSearchField() string
AnalyzerNameForPath(path string) string
AnalyzerNamed(name string) analysis.Analyzer
FieldMappingForPath(path string) FieldMapping
}
// A SynonymMapping extends the IndexMapping interface to provide
// additional methods for working with synonyms.
type SynonymMapping interface {
IndexMapping
MapSynonymDocument(doc *document.Document, collection string, input []string, synonyms []string) error
SynonymSourceForPath(path string) string
SynonymSourceNamed(name string) analysis.SynonymSource
SynonymCount() int
SynonymSourceVisitor(visitor analysis.SynonymSourceVisitor) error
}
// A NestedMapping extends the IndexMapping interface to provide
// additional methods for working with nested object mappings.
type NestedMapping interface {
// NestedDepth returns two values:
// - common: the highest nested level that is common to all given field paths,
// if 0 then there is no common nested level among the given field paths
// - max: the highest nested level that applies to at least one of the given field paths
// if 0 then none of the given field paths are nested
NestedDepth(fieldPaths search.FieldSet) (int, int)
// IntersectsPrefix returns true if any of the given
// field paths intersect with a known nested prefix
IntersectsPrefix(fieldPaths search.FieldSet) bool
// CountNested returns the number of nested object mappings
CountNested() int
}
================================================
FILE: mapping/mapping_no_vectors.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !vectors
// +build !vectors
package mapping
func NewVectorFieldMapping() *FieldMapping {
return nil
}
func NewVectorBase64FieldMapping() *FieldMapping {
return nil
}
func (fm *FieldMapping) processVector(propertyMightBeVector interface{},
pathString string, path []string, indexes []uint64, context *walkContext) bool {
return false
}
func (fm *FieldMapping) processVectorBase64(propertyMightBeVector interface{},
pathString string, path []string, indexes []uint64, context *walkContext) {
}
// -----------------------------------------------------------------------------
// document validation functions
func validateFieldMapping(field *FieldMapping, path []string,
fieldAliasCtx map[string]*FieldMapping) error {
return validateFieldType(field)
}
================================================
FILE: mapping/mapping_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
import (
"encoding/json"
"fmt"
index "github.com/blevesearch/bleve_index_api"
"reflect"
"strconv"
"testing"
"time"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/exception"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/regexp"
"github.com/blevesearch/bleve/v2/document"
)
var mappingSource = []byte(`{
"types": {
"beer": {
"properties": {
"name": {
"fields": [
{
"name": "name",
"type": "text",
"analyzer": "standard",
"store": true,
"index": true,
"include_term_vectors": true,
"include_in_all": true,
"docvalues": true
}
]
}
}
},
"brewery": {
}
},
"type_field": "_type",
"default_type": "_default"
}`)
func buildMapping() IndexMapping {
nameFieldMapping := NewTextFieldMapping()
nameFieldMapping.Name = "name"
nameFieldMapping.Analyzer = "standard"
beerMapping := NewDocumentMapping()
beerMapping.AddFieldMappingsAt("name", nameFieldMapping)
breweryMapping := NewDocumentMapping()
mapping := NewIndexMapping()
mapping.AddDocumentMapping("beer", beerMapping)
mapping.AddDocumentMapping("brewery", breweryMapping)
return mapping
}
func TestUnmarshalMappingJSON(t *testing.T) {
mapping := buildMapping()
var indexMapping IndexMappingImpl
err := json.Unmarshal(mappingSource, &indexMapping)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(&indexMapping, mapping) {
t.Errorf("expected %#v,\n got %#v", mapping, &indexMapping)
}
}
func TestMappingStructWithJSONTags(t *testing.T) {
mapping := buildMapping()
x := struct {
NoJSONTag string
Name string `json:"name"`
}{
Name: "marty",
}
doc := document.NewDocument("1")
err := mapping.MapDocument(doc, x)
if err != nil {
t.Fatal(err)
}
foundJSONName := false
foundNoJSONName := false
count := 0
for _, f := range doc.Fields {
if f.Name() == "name" {
foundJSONName = true
}
if f.Name() == "NoJSONTag" {
foundNoJSONName = true
}
count++
}
if !foundJSONName {
t.Errorf("expected to find field named 'name'")
}
if !foundNoJSONName {
t.Errorf("expected to find field named 'NoJSONTag'")
}
if count != 2 {
t.Errorf("expected to find 2 find, found %d", count)
}
}
func TestMappingStructWithJSONTagsOneDisabled(t *testing.T) {
mapping := buildMapping()
x := struct {
Name string `json:"name"`
Title string `json:"-"`
NoJSONTag string
}{
Name: "marty",
}
doc := document.NewDocument("1")
err := mapping.MapDocument(doc, x)
if err != nil {
t.Fatal(err)
}
foundJSONName := false
foundNoJSONName := false
count := 0
for _, f := range doc.Fields {
if f.Name() == "name" {
foundJSONName = true
}
if f.Name() == "NoJSONTag" {
foundNoJSONName = true
}
count++
}
if !foundJSONName {
t.Errorf("expected to find field named 'name'")
}
if !foundNoJSONName {
t.Errorf("expected to find field named 'NoJSONTag'")
}
if count != 2 {
t.Errorf("expected to find 2 find, found %d", count)
}
}
func TestMappingStructWithAlternateTags(t *testing.T) {
mapping := buildMapping()
mapping.(*IndexMappingImpl).DefaultMapping.StructTagKey = "bleve"
x := struct {
NoBLEVETag string
Name string `bleve:"name"`
}{
Name: "marty",
}
doc := document.NewDocument("1")
err := mapping.MapDocument(doc, x)
if err != nil {
t.Fatal(err)
}
foundBLEVEName := false
foundNoBLEVEName := false
count := 0
for _, f := range doc.Fields {
if f.Name() == "name" {
foundBLEVEName = true
}
if f.Name() == "NoBLEVETag" {
foundNoBLEVEName = true
}
count++
}
if !foundBLEVEName {
t.Errorf("expected to find field named 'name'")
}
if !foundNoBLEVEName {
t.Errorf("expected to find field named 'NoBLEVETag'")
}
if count != 2 {
t.Errorf("expected to find 2 find, found %d", count)
}
}
func TestMappingStructWithAlternateTagsTwoDisabled(t *testing.T) {
mapping := buildMapping()
mapping.(*IndexMappingImpl).DefaultMapping.StructTagKey = "bleve"
x := struct {
Name string `json:"-" bleve:"name"`
Title string `json:"-" bleve:"-"`
NoBLEVETag string `json:"-"`
Extra string `json:"extra" bleve:"-"`
}{
Name: "marty",
}
doc := document.NewDocument("1")
err := mapping.MapDocument(doc, x)
if err != nil {
t.Fatal(err)
}
foundBLEVEName := false
foundNoBLEVEName := false
count := 0
for _, f := range doc.Fields {
if f.Name() == "name" {
foundBLEVEName = true
}
if f.Name() == "NoBLEVETag" {
foundNoBLEVEName = true
}
count++
}
if !foundBLEVEName {
t.Errorf("expected to find field named 'name'")
}
if !foundNoBLEVEName {
t.Errorf("expected to find field named 'NoBLEVETag'")
}
if count != 2 {
t.Errorf("expected to find 2 find, found %d", count)
}
}
func TestMappingStructWithPointerToString(t *testing.T) {
mapping := buildMapping()
name := "marty"
x := struct {
Name *string
}{
Name: &name,
}
doc := document.NewDocument("1")
err := mapping.MapDocument(doc, x)
if err != nil {
t.Fatal(err)
}
found := false
count := 0
for _, f := range doc.Fields {
if f.Name() == "Name" {
found = true
}
count++
}
if !found {
t.Errorf("expected to find field named 'Name'")
}
if count != 1 {
t.Errorf("expected to find 1 find, found %d", count)
}
}
func TestMappingJSONWithNull(t *testing.T) {
mapping := NewIndexMapping()
jsonbytes := []byte(`{"name":"marty", "age": null}`)
var jsondoc interface{}
err := json.Unmarshal(jsonbytes, &jsondoc)
if err != nil {
t.Fatal(err)
}
doc := document.NewDocument("1")
err = mapping.MapDocument(doc, jsondoc)
if err != nil {
t.Fatal(err)
}
found := false
count := 0
for _, f := range doc.Fields {
if f.Name() == "name" {
found = true
}
count++
}
if !found {
t.Errorf("expected to find field named 'name'")
}
if count != 1 {
t.Errorf("expected to find 1 find, found %d", count)
}
}
func TestMappingForPath(t *testing.T) {
enFieldMapping := NewTextFieldMapping()
enFieldMapping.Analyzer = "en"
docMappingA := NewDocumentMapping()
docMappingA.AddFieldMappingsAt("name", enFieldMapping)
customMapping := NewTextFieldMapping()
customMapping.Analyzer = "xyz"
customMapping.Name = "nameCustom"
subDocMappingB := NewDocumentMapping()
customFieldX := NewTextFieldMapping()
customFieldX.Analyzer = "analyzerx"
subDocMappingB.AddFieldMappingsAt("desc", customFieldX)
docMappingA.AddFieldMappingsAt("author", enFieldMapping, customMapping)
docMappingA.AddSubDocumentMapping("child", subDocMappingB)
mapping := NewIndexMapping()
mapping.AddDocumentMapping("a", docMappingA)
analyzerName := mapping.AnalyzerNameForPath("name")
if analyzerName != enFieldMapping.Analyzer {
t.Errorf("expected '%s' got '%s'", enFieldMapping.Analyzer, analyzerName)
}
analyzerName = mapping.AnalyzerNameForPath("nameCustom")
if analyzerName != customMapping.Analyzer {
t.Errorf("expected '%s' got '%s'", customMapping.Analyzer, analyzerName)
}
analyzerName = mapping.AnalyzerNameForPath("child.desc")
if analyzerName != customFieldX.Analyzer {
t.Errorf("expected '%s' got '%s'", customFieldX.Analyzer, analyzerName)
}
}
func TestMappingWithTokenizerDeps(t *testing.T) {
tokNoDeps := map[string]interface{}{
"type": regexp.Name,
"regexp": "",
}
tokDepsL1 := map[string]interface{}{
"type": exception.Name,
"tokenizer": "a",
"exceptions": []string{".*"},
}
// this tests a 1-level dependency
// it is run 100 times to increase the
// likelihood that it fails along time way
// (depends on key order iteration in map)
for i := 0; i < 100; i++ {
m := NewIndexMapping()
ca := customAnalysis{
Tokenizers: map[string]map[string]interface{}{
"a": tokNoDeps,
"b": tokDepsL1,
},
}
err := ca.registerAll(m)
if err != nil {
t.Fatal(err)
}
}
tokDepsL2 := map[string]interface{}{
"type": "exception",
"tokenizer": "b",
"exceptions": []string{".*"},
}
// now test a second-level dependency
for i := 0; i < 100; i++ {
m := NewIndexMapping()
ca := customAnalysis{
Tokenizers: map[string]map[string]interface{}{
"a": tokNoDeps,
"b": tokDepsL1,
"c": tokDepsL2,
},
}
err := ca.registerAll(m)
if err != nil {
t.Fatal(err)
}
}
tokUnsatisfied := map[string]interface{}{
"type": "exception",
"tokenizer": "e",
}
// now make sure an unsatisfied dep still
// results in an error
m := NewIndexMapping()
ca := customAnalysis{
Tokenizers: map[string]map[string]interface{}{
"a": tokNoDeps,
"b": tokDepsL1,
"c": tokDepsL2,
"d": tokUnsatisfied,
},
}
err := ca.registerAll(m)
if err == nil {
t.Fatal(err)
}
}
func TestEnablingDisablingStoringDynamicFields(t *testing.T) {
// first verify that with system defaults, dynamic field is stored
data := map[string]interface{}{
"name": "bleve",
}
doc := document.NewDocument("x")
mapping := NewIndexMapping()
err := mapping.MapDocument(doc, data)
if err != nil {
t.Fatal(err)
}
for _, field := range doc.Fields {
if field.Name() == "name" && !field.Options().IsStored() {
t.Errorf("expected field 'name' to be stored, isn't")
}
}
// now change system level defaults, verify dynamic field is not stored
StoreDynamic = false
defer func() {
StoreDynamic = true
}()
mapping = NewIndexMapping()
doc = document.NewDocument("y")
err = mapping.MapDocument(doc, data)
if err != nil {
t.Fatal(err)
}
for _, field := range doc.Fields {
if field.Name() == "name" && field.Options().IsStored() {
t.Errorf("expected field 'name' to be not stored, is")
}
}
// now override the system level defaults inside the index mapping
mapping = NewIndexMapping()
mapping.StoreDynamic = true
doc = document.NewDocument("y")
err = mapping.MapDocument(doc, data)
if err != nil {
t.Fatal(err)
}
for _, field := range doc.Fields {
if field.Name() == "name" && !field.Options().IsStored() {
t.Errorf("expected field 'name' to be stored, isn't")
}
}
}
func TestMappingBool(t *testing.T) {
boolMapping := NewBooleanFieldMapping()
docMapping := NewDocumentMapping()
docMapping.AddFieldMappingsAt("prop", boolMapping)
mapping := NewIndexMapping()
mapping.AddDocumentMapping("doc", docMapping)
pprop := false
x := struct {
Prop bool `json:"prop"`
PProp *bool `json:"pprop"`
}{
Prop: true,
PProp: &pprop,
}
doc := document.NewDocument("1")
err := mapping.MapDocument(doc, x)
if err != nil {
t.Fatal(err)
}
foundProp := false
foundPProp := false
count := 0
for _, f := range doc.Fields {
if f.Name() == "prop" {
foundProp = true
}
if f.Name() == "pprop" {
foundPProp = true
}
count++
}
if !foundProp {
t.Errorf("expected to find bool field named 'prop'")
}
if !foundPProp {
t.Errorf("expected to find pointer to bool field named 'pprop'")
}
if count != 2 {
t.Errorf("expected to find 2 fields, found %d", count)
}
}
func TestDisableDefaultMapping(t *testing.T) {
indexMapping := NewIndexMapping()
indexMapping.DefaultMapping.Enabled = false
data := map[string]string{
"name": "bleve",
}
doc := document.NewDocument("x")
err := indexMapping.MapDocument(doc, data)
if err != nil {
t.Error(err)
}
if len(doc.Fields) > 0 {
t.Errorf("expected no fields, got %d", len(doc.Fields))
}
}
func TestInvalidFieldMappingStrict(t *testing.T) {
mappingBytes := []byte(`{"includeInAll":true,"name":"a parsed name"}`)
// first unmarhsal it without strict
var fm FieldMapping
err := json.Unmarshal(mappingBytes, &fm)
if err != nil {
t.Fatal(err)
}
if fm.Name != "a parsed name" {
t.Fatalf("expect to find field mapping name 'a parsed name', got '%s'", fm.Name)
}
// reset
fm.Name = ""
// now enable strict
MappingJSONStrict = true
defer func() {
MappingJSONStrict = false
}()
expectedInvalidKeys := []string{"includeInAll"}
expectedErr := fmt.Errorf("field mapping contains invalid keys: %v", expectedInvalidKeys)
err = json.Unmarshal(mappingBytes, &fm)
if err.Error() != expectedErr.Error() {
t.Fatalf("expected err: %v, got err: %v", expectedErr, err)
}
if fm.Name != "a parsed name" {
t.Fatalf("expect to find field mapping name 'a parsed name', got '%s'", fm.Name)
}
}
func TestInvalidDocumentMappingStrict(t *testing.T) {
mappingBytes := []byte(`{"defaultAnalyzer":true,"enabled":false}`)
// first unmarhsal it without strict
var dm DocumentMapping
err := json.Unmarshal(mappingBytes, &dm)
if err != nil {
t.Fatal(err)
}
if dm.Enabled != false {
t.Fatalf("expect to find document mapping enabled false, got '%t'", dm.Enabled)
}
// reset
dm.Enabled = true
// now enable strict
MappingJSONStrict = true
defer func() {
MappingJSONStrict = false
}()
expectedInvalidKeys := []string{"defaultAnalyzer"}
expectedErr := fmt.Errorf("document mapping contains invalid keys: %v", expectedInvalidKeys)
err = json.Unmarshal(mappingBytes, &dm)
if err.Error() != expectedErr.Error() {
t.Fatalf("expected err: %v, got err: %v", expectedErr, err)
}
if dm.Enabled != false {
t.Fatalf("expect to find document mapping enabled false, got '%t'", dm.Enabled)
}
}
func TestInvalidIndexMappingStrict(t *testing.T) {
mappingBytes := []byte(`{"typeField":"type","default_field":"all"}`)
// first unmarhsal it without strict
var im IndexMappingImpl
err := json.Unmarshal(mappingBytes, &im)
if err != nil {
t.Fatal(err)
}
if im.DefaultField != "all" {
t.Fatalf("expect to find index mapping default field 'all', got '%s'", im.DefaultField)
}
// reset
im.DefaultField = "_all"
// now enable strict
MappingJSONStrict = true
defer func() {
MappingJSONStrict = false
}()
expectedInvalidKeys := []string{"typeField"}
expectedErr := fmt.Errorf("index mapping contains invalid keys: %v", expectedInvalidKeys)
err = json.Unmarshal(mappingBytes, &im)
if err.Error() != expectedErr.Error() {
t.Fatalf("expected err: %v, got err: %v", expectedErr, err)
}
if im.DefaultField != "all" {
t.Fatalf("expect to find index mapping default field 'all', got '%s'", im.DefaultField)
}
}
func TestMappingBug353(t *testing.T) {
dataBytes := `{
"Reviews": [
{
"ReviewID": "RX16692001",
"Content": "Usually stay near the airport..."
}
],
"Other": {
"Inside": "text"
},
"Name": "The Inn at Baltimore White Marsh"
}`
var data map[string]interface{}
err := json.Unmarshal([]byte(dataBytes), &data)
if err != nil {
t.Fatal(err)
}
reviewContentFieldMapping := NewTextFieldMapping()
reviewContentFieldMapping.Analyzer = "crazy"
reviewsMapping := NewDocumentMapping()
reviewsMapping.Dynamic = false
reviewsMapping.AddFieldMappingsAt("Content", reviewContentFieldMapping)
otherMapping := NewDocumentMapping()
otherMapping.Dynamic = false
mapping := NewIndexMapping()
mapping.DefaultMapping.AddSubDocumentMapping("Reviews", reviewsMapping)
mapping.DefaultMapping.AddSubDocumentMapping("Other", otherMapping)
doc := document.NewDocument("x")
err = mapping.MapDocument(doc, data)
if err != nil {
t.Fatal(err)
}
// expect doc has only 2 fields
if len(doc.Fields) != 2 {
t.Errorf("expected doc with 2 fields, got: %d", len(doc.Fields))
for _, f := range doc.Fields {
t.Logf("field named: %s", f.Name())
}
}
}
func TestAnonymousStructFields(t *testing.T) {
type Contact0 string
type Contact1 struct {
Name string
}
type Contact2 interface{}
type Contact3 interface{}
type Thing struct {
Contact0
Contact1
Contact2
Contact3
}
x := Thing{
Contact0: "hello",
Contact1: Contact1{
Name: "marty",
},
Contact2: Contact1{
Name: "will",
},
Contact3: "steve",
}
doc := document.NewDocument("1")
m := NewIndexMapping()
err := m.MapDocument(doc, x)
if err != nil {
t.Fatal(err)
}
if len(doc.Fields) != 4 {
t.Fatalf("expected 4 fields, got %d", len(doc.Fields))
}
if doc.Fields[0].Name() != "Contact0" {
t.Errorf("expected field named 'Contact0', got '%s'", doc.Fields[0].Name())
}
if doc.Fields[1].Name() != "Name" {
t.Errorf("expected field named 'Name', got '%s'", doc.Fields[1].Name())
}
if doc.Fields[2].Name() != "Contact2.Name" {
t.Errorf("expected field named 'Contact2.Name', got '%s'", doc.Fields[2].Name())
}
if doc.Fields[3].Name() != "Contact3" {
t.Errorf("expected field named 'Contact3', got '%s'", doc.Fields[3].Name())
}
type AnotherThing struct {
Contact0 `json:"Alternate0"`
Contact1 `json:"Alternate1"`
Contact2 `json:"Alternate2"`
Contact3 `json:"Alternate3"`
}
y := AnotherThing{
Contact0: "hello",
Contact1: Contact1{
Name: "marty",
},
Contact2: Contact1{
Name: "will",
},
Contact3: "steve",
}
doc2 := document.NewDocument("2")
err = m.MapDocument(doc2, y)
if err != nil {
t.Fatal(err)
}
if len(doc2.Fields) != 4 {
t.Fatalf("expected 4 fields, got %d", len(doc2.Fields))
}
if doc2.Fields[0].Name() != "Alternate0" {
t.Errorf("expected field named 'Alternate0', got '%s'", doc2.Fields[0].Name())
}
if doc2.Fields[1].Name() != "Alternate1.Name" {
t.Errorf("expected field named 'Name', got '%s'", doc2.Fields[1].Name())
}
if doc2.Fields[2].Name() != "Alternate2.Name" {
t.Errorf("expected field named 'Alternate2.Name', got '%s'", doc2.Fields[2].Name())
}
if doc2.Fields[3].Name() != "Alternate3" {
t.Errorf("expected field named 'Alternate3', got '%s'", doc2.Fields[3].Name())
}
}
func TestAnonymousStructFieldWithJSONStructTagEmptString(t *testing.T) {
type InterfaceThing interface{}
type Thing struct {
InterfaceThing `json:""`
}
x := Thing{
InterfaceThing: map[string]interface{}{
"key": "value",
},
}
doc := document.NewDocument("1")
m := NewIndexMapping()
err := m.MapDocument(doc, x)
if err != nil {
t.Fatal(err)
}
if len(doc.Fields) != 1 {
t.Fatalf("expected 1 field, got %d", len(doc.Fields))
}
if doc.Fields[0].Name() != "key" {
t.Errorf("expected field named 'key', got '%s'", doc.Fields[0].Name())
}
}
func TestMappingPrimitives(t *testing.T) {
tests := []struct {
data interface{}
}{
{data: "marty"},
{data: int(1)},
{data: int8(2)},
{data: int16(3)},
{data: int32(4)},
{data: int64(5)},
{data: uint(6)},
{data: uint8(7)},
{data: uint16(8)},
{data: uint32(9)},
{data: uint64(10)},
{data: float32(11.0)},
{data: float64(12.0)},
{data: false},
}
m := NewIndexMapping()
for _, test := range tests {
doc := document.NewDocument("x")
err := m.MapDocument(doc, test.data)
if err != nil {
t.Fatal(err)
}
if len(doc.Fields) != 1 {
t.Errorf("expected 1 field, got %d for %v", len(doc.Fields), test.data)
}
}
}
func TestMappingForGeo(t *testing.T) {
type Location struct {
Lat float64
Lon float64
}
nameFieldMapping := NewTextFieldMapping()
nameFieldMapping.Name = "name"
nameFieldMapping.Analyzer = "standard"
locFieldMapping := NewGeoPointFieldMapping()
thingMapping := NewDocumentMapping()
thingMapping.AddFieldMappingsAt("name", nameFieldMapping)
thingMapping.AddFieldMappingsAt("location", locFieldMapping)
mapping := NewIndexMapping()
mapping.DefaultMapping = thingMapping
geopoints := []interface{}{}
expect := [][]float64{} // to contain expected [lon,lat] for geopoints
// geopoint as a struct
geopoints = append(geopoints, struct {
Name string `json:"name"`
Location *Location `json:"location"`
}{
Name: "struct",
Location: &Location{
Lon: -180,
Lat: -90,
},
})
expect = append(expect, []float64{-180, -90})
// geopoint as a map
geopoints = append(geopoints, struct {
Name string `json:"name"`
Location map[string]interface{} `json:"location"`
}{
Name: "map",
Location: map[string]interface{}{
"lon": -180,
"lat": -90,
},
})
expect = append(expect, []float64{-180, -90})
// geopoint as a slice, format: {lon, lat}
geopoints = append(geopoints, struct {
Name string `json:"name"`
Location []interface{} `json:"location"`
}{
Name: "slice",
Location: []interface{}{
-180, -90,
},
})
expect = append(expect, []float64{-180, -90})
// geopoint as a string, format: "lat,lon"
geopoints = append(geopoints, struct {
Name string `json:"name"`
Location []interface{} `json:"location"`
}{
Name: "string",
Location: []interface{}{
"-90,-180",
},
})
expect = append(expect, []float64{-180, -90})
// geopoint as a string, format: "lat , lon" with leading/trailing whitespaces
geopoints = append(geopoints, struct {
Name string `json:"name"`
Location []interface{} `json:"location"`
}{
Name: "string",
Location: []interface{}{
"-90 , -180",
},
})
expect = append(expect, []float64{-180, -90})
// geopoint as a string - geohash
geopoints = append(geopoints, struct {
Name string `json:"name"`
Location []interface{} `json:"location"`
}{
Name: "string",
Location: []interface{}{
"000000000000",
},
})
expect = append(expect, []float64{-180, -90})
// geopoint as a string - geohash
geopoints = append(geopoints, struct {
Name string `json:"name"`
Location []interface{} `json:"location"`
}{
Name: "string",
Location: []interface{}{
"drm3btev3e86",
},
})
expect = append(expect, []float64{-71.34, 41.12})
for i, geopoint := range geopoints {
doc := document.NewDocument(fmt.Sprint(i))
err := mapping.MapDocument(doc, geopoint)
if err != nil {
t.Fatal(err)
}
var foundGeo bool
for _, f := range doc.Fields {
if f.Name() == "location" {
foundGeo = true
geoF, ok := f.(index.GeoPointField)
if !ok {
t.Errorf("expected a geopoint field!")
}
lon, err := geoF.Lon()
if err != nil {
t.Errorf("error in fetching lon, err: %v", err)
}
lat, err := geoF.Lat()
if err != nil {
t.Errorf("error in fetching lat, err: %v", err)
}
// round obtained lon, lat to 2 decimal places
roundLon, _ := strconv.ParseFloat(fmt.Sprintf("%.2f", lon), 64)
roundLat, _ := strconv.ParseFloat(fmt.Sprintf("%.2f", lat), 64)
if roundLon != expect[i][0] || roundLat != expect[i][1] {
t.Errorf("expected geo point: {%v, %v}, got {%v, %v}",
expect[i][0], expect[i][1], lon, lat)
}
}
}
if !foundGeo {
t.Errorf("expected to find geo point, did not")
}
}
}
type textMarshalable struct {
body string
Extra string
}
func (t *textMarshalable) MarshalText() ([]byte, error) {
return []byte(t.body), nil
}
func TestMappingForTextMarshaler(t *testing.T) {
tm := struct {
Marshalable *textMarshalable
}{
Marshalable: &textMarshalable{
body: "text",
Extra: "stuff",
},
}
// first verify that when using a mapping that doesn't explicitly
// map the struct field as text, then we traverse inside the struct
// and do our best
m := NewIndexMapping()
doc := document.NewDocument("x")
err := m.MapDocument(doc, tm)
if err != nil {
t.Fatal(err)
}
if len(doc.Fields) != 1 {
t.Fatalf("expected 1 field, got: %d", len(doc.Fields))
}
if doc.Fields[0].Name() != "Marshalable.Extra" {
t.Errorf("expected field to be named 'Marshalable.Extra', got: '%s'", doc.Fields[0].Name())
}
if string(doc.Fields[0].Value()) != tm.Marshalable.Extra {
t.Errorf("expected field value to be '%s', got: '%s'", tm.Marshalable.Extra, string(doc.Fields[0].Value()))
}
// now verify that when a mapping explicitly
m = NewIndexMapping()
txt := NewTextFieldMapping()
m.DefaultMapping.AddFieldMappingsAt("Marshalable", txt)
doc = document.NewDocument("x")
err = m.MapDocument(doc, tm)
if err != nil {
t.Fatal(err)
}
if len(doc.Fields) != 1 {
t.Fatalf("expected 1 field, got: %d", len(doc.Fields))
}
if doc.Fields[0].Name() != "Marshalable" {
t.Errorf("expected field to be named 'Marshalable', got: '%s'", doc.Fields[0].Name())
}
want, err := tm.Marshalable.MarshalText()
if err != nil {
t.Fatal(err)
}
if string(doc.Fields[0].Value()) != string(want) {
t.Errorf("expected field value to be '%s', got: '%s'", string(want), string(doc.Fields[0].Value()))
}
}
func TestMappingForNilTextMarshaler(t *testing.T) {
tm := struct {
Marshalable *time.Time
}{
Marshalable: nil,
}
// now verify that when a mapping explicitly
m := NewIndexMapping()
txt := NewTextFieldMapping()
m.DefaultMapping.AddFieldMappingsAt("Marshalable", txt)
doc := document.NewDocument("x")
err := m.MapDocument(doc, tm)
if err != nil {
t.Fatal(err)
}
if len(doc.Fields) != 0 {
t.Fatalf("expected 1 field, got: %d", len(doc.Fields))
}
}
func TestClosestDocDynamicMapping(t *testing.T) {
mapping := NewIndexMapping()
mapping.IndexDynamic = false
mapping.DefaultMapping = NewDocumentStaticMapping()
mapping.DefaultMapping.AddFieldMappingsAt("foo", NewTextFieldMapping())
doc := document.NewDocument("x")
err := mapping.MapDocument(doc, map[string]interface{}{
"foo": "value",
"bar": map[string]string{
"foo": "value2",
"baz": "value3",
},
})
if err != nil {
t.Fatal(err)
}
if len(doc.Fields) != 1 {
t.Fatalf("expected 1 field, got: %d", len(doc.Fields))
}
}
func TestMappingPointerToTimeBug1152(t *testing.T) {
when, err := time.Parse(time.RFC3339, "2019-03-06T15:04:05Z")
if err != nil {
t.Fatal(err)
}
thing := struct {
When *time.Time
}{
When: &when,
}
// this case tests when there WAS an explicit mapping, but it was NOT type text
// as this was the specific case that was problematic
m := NewIndexMapping()
dtf := NewDateTimeFieldMapping()
m.DefaultMapping.AddFieldMappingsAt("When", dtf)
doc := document.NewDocument("x")
err = m.MapDocument(doc, thing)
if err != nil {
t.Fatal(err)
}
if len(doc.Fields) != 1 {
t.Fatalf("expected 1 field, got: %d", len(doc.Fields))
}
if _, ok := doc.Fields[0].(index.DateTimeField); !ok {
t.Fatalf("expected field to be type *document.DateTimeField, got %T", doc.Fields[0])
}
}
func TestDefaultAnalyzerInheritance(t *testing.T) {
docMapping := NewDocumentMapping()
docMapping.DefaultAnalyzer = "xyz"
childMapping := NewTextFieldMapping()
docMapping.AddFieldMappingsAt("field", childMapping)
if analyzer := docMapping.defaultAnalyzerName([]string{"field"}); analyzer != "xyz" {
t.Fatalf("Expected analyzer: xyz to be inherited by field, but got: '%v'", analyzer)
}
}
func TestWrongAnalyzerSearchableAs(t *testing.T) {
fieldMapping := NewTextFieldMapping()
fieldMapping.Name = "geo.accuracy"
fieldMapping.Analyzer = "xyz"
nestedMapping := NewDocumentMapping()
nestedMapping.AddFieldMappingsAt("accuracy", fieldMapping)
docMapping := NewDocumentMapping()
docMapping.AddSubDocumentMapping("geo", nestedMapping)
indexMapping := NewIndexMapping()
indexMapping.AddDocumentMapping("brewery", docMapping)
analyzerName := indexMapping.AnalyzerNameForPath("geo.geo.accuracy")
if analyzerName != "xyz" {
t.Errorf("expected analyzer name `xyz`, got `%s`", analyzerName)
}
}
func TestMappingArrayOfStringGeoPoints(t *testing.T) {
nameFieldMapping := NewTextFieldMapping()
nameFieldMapping.Name = "name"
nameFieldMapping.Analyzer = "standard"
locFieldMapping := NewGeoPointFieldMapping()
thingMapping := NewDocumentMapping()
thingMapping.AddFieldMappingsAt("points", locFieldMapping)
mapping := NewIndexMapping()
mapping.DefaultMapping = thingMapping
docs := []map[string]interface{}{
{
// string: "lat,lon"
"points": []string{
"1.0, 2.0",
"3.0, 4.0",
"5.0, 6.0",
},
},
{
// slice: {lon, lat}
"points": [][]float64{
{2.0, 1.0},
{4.0, 3.0},
{6.0, 5.0},
},
},
{
// struct: {"lon/lng": .., "lat": ..}
"points": []map[string]interface{}{
{"lon": 2.0, "lat": 1.0},
{"lng": 4.0, "lat": 3.0},
{"lng": 6.0, "lat": 5.0},
},
},
}
for _, docSrc := range docs {
doc := document.NewDocument("x")
err := mapping.MapDocument(doc, docSrc)
if err != nil {
t.Fatal(err)
}
// points here in lon, lat order
expectPoints := map[string][]float64{
"first": {2.0, 1.0},
"second": {4.0, 3.0},
"third": {6.0, 5.0},
}
for _, f := range doc.Fields {
if f.Name() == "points" {
geoF, ok := f.(*document.GeoPointField)
if !ok {
t.Errorf("expected a geopoint field!")
}
lon, err := geoF.Lon()
if err != nil {
t.Errorf("error in fetching lon, err: %v", err)
}
lat, err := geoF.Lat()
if err != nil {
t.Errorf("error in fetching lat, err: %v", err)
}
// round obtained lon, lat to 2 decimal places
roundLon, _ := strconv.ParseFloat(fmt.Sprintf("%.2f", lon), 64)
roundLat, _ := strconv.ParseFloat(fmt.Sprintf("%.2f", lat), 64)
for key, point := range expectPoints {
if roundLon == point[0] && roundLat == point[1] {
delete(expectPoints, key)
}
}
}
}
if len(expectPoints) > 0 {
t.Errorf("some points not found: %v", expectPoints)
}
}
}
================================================
FILE: mapping/mapping_vectors.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package mapping
import (
"fmt"
"reflect"
"slices"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
faiss "github.com/blevesearch/go-faiss"
)
// Min and Max allowed dimensions for a vector field;
// p.s must be set/updated at process init() _only_
var (
MinVectorDims = 1
MaxVectorDims = 4096
)
func NewVectorFieldMapping() *FieldMapping {
return &FieldMapping{
Type: "vector",
Store: false,
Index: true,
IncludeInAll: false,
DocValues: false,
SkipFreqNorm: true,
}
}
func NewVectorBase64FieldMapping() *FieldMapping {
return &FieldMapping{
Type: "vector_base64",
Store: false,
Index: true,
IncludeInAll: false,
DocValues: false,
SkipFreqNorm: true,
}
}
// validate and process a flat vector
func processFlatVector(vecV reflect.Value, dims int) ([]float32, bool) {
if vecV.Len() != dims {
return nil, false
}
rv := make([]float32, dims)
for i := 0; i < vecV.Len(); i++ {
item := vecV.Index(i)
if !item.CanInterface() {
return nil, false
}
itemI := item.Interface()
itemFloat, ok := util.ExtractNumericValFloat32(itemI)
if !ok {
return nil, false
}
rv[i] = itemFloat
}
return rv, true
}
// validate and process a vector
// max supported depth of nesting is 2 ([][]float32)
func processVector(vecI interface{}, dims int) ([]float32, bool) {
vecV := reflect.ValueOf(vecI)
if !vecV.IsValid() || vecV.Kind() != reflect.Slice || vecV.Len() == 0 {
return nil, false
}
// Let's examine the first element (head) of the vector.
// If head is a slice, then vector is nested, otherwise flat.
head := vecV.Index(0)
if !head.CanInterface() {
return nil, false
}
headI := head.Interface()
headV := reflect.ValueOf(headI)
if !headV.IsValid() {
return nil, false
}
if headV.Kind() != reflect.Slice { // vector is flat
return processFlatVector(vecV, dims)
}
// # process nested vector
// pre-allocate memory for the flattened vector
// so that we can use copy() later
rv := make([]float32, dims*vecV.Len())
for i := 0; i < vecV.Len(); i++ {
subVec := vecV.Index(i)
if !subVec.CanInterface() {
return nil, false
}
subVecI := subVec.Interface()
subVecV := reflect.ValueOf(subVecI)
if !subVecV.IsValid() {
return nil, false
}
if subVecV.Kind() != reflect.Slice {
return nil, false
}
flatVector, ok := processFlatVector(subVecV, dims)
if !ok {
return nil, false
}
copy(rv[i*dims:(i+1)*dims], flatVector)
}
return rv, true
}
func (fm *FieldMapping) processVector(propertyMightBeVector interface{},
pathString string, path []string, indexes []uint64, context *walkContext) bool {
vector, ok := processVector(propertyMightBeVector, fm.Dims)
// Don't add field to document if vector is invalid
if !ok {
return false
}
// Apply defaults for similarity and optimization if not set
similarity := fm.Similarity
if similarity == "" {
similarity = index.DefaultVectorSimilarityMetric
}
vectorIndexOptimizedFor := fm.VectorIndexOptimizedFor
if vectorIndexOptimizedFor == "" {
vectorIndexOptimizedFor = index.DefaultIndexOptimization
}
// bivf indexes only supports hamming distance for the primary
// binary index. Similarity here is used for the backing flat index,
// which is set to cosine similarity for recall reasons
if index.OptimizationRequiresBinaryIndex(vectorIndexOptimizedFor) {
similarity = index.CosineSimilarity
}
// normalize raw vector if similarity is cosine
// Since the vector can be multi-vector (flattened array of multiple vectors),
// we use NormalizeMultiVector to normalize each sub-vector independently.
if similarity == index.CosineSimilarity {
vector = NormalizeMultiVector(vector, fm.Dims)
}
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewVectorFieldWithIndexingOptions(fieldName, indexes, vector,
fm.Dims, similarity, vectorIndexOptimizedFor, options)
context.doc.AddField(field)
// "_all" composite field is not applicable for vector field
context.excludedFromAll = append(context.excludedFromAll, fieldName)
return true
}
func (fm *FieldMapping) processVectorBase64(propertyMightBeVectorBase64 interface{},
pathString string, path []string, indexes []uint64, context *walkContext) {
encodedString, ok := propertyMightBeVectorBase64.(string)
if !ok {
return
}
// Apply defaults for similarity and optimization if not set
similarity := fm.Similarity
if similarity == "" {
similarity = index.DefaultVectorSimilarityMetric
}
vectorIndexOptimizedFor := fm.VectorIndexOptimizedFor
if vectorIndexOptimizedFor == "" {
vectorIndexOptimizedFor = index.DefaultIndexOptimization
}
// bivf indexes only supports hamming distance for the primary
// binary index. Similarity here is used for the backing flat index,
// which is set to cosine similarity for recall reasons
if index.OptimizationRequiresBinaryIndex(vectorIndexOptimizedFor) {
similarity = index.CosineSimilarity
}
decodedVector, err := document.DecodeVector(encodedString)
if err != nil || len(decodedVector) != fm.Dims {
return
}
// normalize raw vector if similarity is cosine, multi-vector is not supported
// for base64 encoded vectors, so we use NormalizeVector directly.
if similarity == index.CosineSimilarity {
decodedVector = NormalizeVector(decodedVector)
}
fieldName := getFieldName(pathString, path, fm)
options := fm.Options()
field := document.NewVectorFieldWithIndexingOptions(fieldName, indexes, decodedVector,
fm.Dims, similarity, vectorIndexOptimizedFor, options)
context.doc.AddField(field)
// "_all" composite field is not applicable for vector_base64 field
context.excludedFromAll = append(context.excludedFromAll, fieldName)
}
// -----------------------------------------------------------------------------
// document validation functions
func validateFieldMapping(field *FieldMapping, path []string,
fieldAliasCtx map[string]*FieldMapping) error {
switch field.Type {
case "vector", "vector_base64":
return validateVectorFieldAlias(field, path, fieldAliasCtx)
default: // non-vector field
return validateFieldType(field)
}
}
func validateVectorFieldAlias(field *FieldMapping, path []string,
fieldAliasCtx map[string]*FieldMapping) error {
// fully qualified field name
pathString := encodePath(path)
// check if field has a name set, else use path to compute effective name
effectiveFieldName := getFieldName(pathString, path, field)
// Compute effective values for validation
effectiveSimilarity := field.Similarity
if effectiveSimilarity == "" {
effectiveSimilarity = index.DefaultVectorSimilarityMetric
}
effectiveOptimizedFor := field.VectorIndexOptimizedFor
if effectiveOptimizedFor == "" {
effectiveOptimizedFor = index.DefaultIndexOptimization
}
// # If alias is present, validate the field options as per the alias.
// note: reading from a nil map is safe
if fieldAlias, ok := fieldAliasCtx[effectiveFieldName]; ok {
if field.Dims != fieldAlias.Dims {
return fmt.Errorf("field: '%s', invalid alias "+
"(different dimensions %d and %d)", effectiveFieldName, field.Dims,
fieldAlias.Dims)
}
// Compare effective similarity values
aliasSimilarity := fieldAlias.Similarity
if aliasSimilarity == "" {
aliasSimilarity = index.DefaultVectorSimilarityMetric
}
if effectiveSimilarity != aliasSimilarity {
return fmt.Errorf("field: '%s', invalid alias "+
"(different similarity values %s and %s)", effectiveFieldName,
effectiveSimilarity, aliasSimilarity)
}
// Compare effective vector index optimization values
aliasOptimizedFor := fieldAlias.VectorIndexOptimizedFor
if aliasOptimizedFor == "" {
aliasOptimizedFor = index.DefaultIndexOptimization
}
if effectiveOptimizedFor != aliasOptimizedFor {
return fmt.Errorf("field: '%s', invalid alias "+
"(different vector index optimization values %s and %s)", effectiveFieldName,
effectiveOptimizedFor, aliasOptimizedFor)
}
return nil
}
// # Validate field options
// Vector dimensions must be within allowed range
if field.Dims < MinVectorDims || field.Dims > MaxVectorDims {
return fmt.Errorf("field: '%s', invalid vector dimension: %d,"+
" value should be in range [%d, %d]", effectiveFieldName, field.Dims,
MinVectorDims, MaxVectorDims)
}
// Similarity metric must be supported
if _, ok := index.SupportedVectorSimilarityMetrics[effectiveSimilarity]; !ok {
return fmt.Errorf("field: '%s', invalid similarity "+
"metric: '%s', valid metrics are: %+v", effectiveFieldName, effectiveSimilarity,
reflect.ValueOf(index.SupportedVectorSimilarityMetrics).MapKeys())
}
// Vector index optimization must be supported
if _, ok := index.SupportedVectorIndexOptimizations[effectiveOptimizedFor]; !ok {
return fmt.Errorf("field: '%s', invalid vector index "+
"optimization: '%s', valid optimizations are: %+v", effectiveFieldName,
effectiveOptimizedFor,
reflect.ValueOf(index.SupportedVectorIndexOptimizations).MapKeys())
}
// bivf indexes requires vector dimensionality to be a multiple of 8
if index.OptimizationRequiresBinaryIndex(effectiveOptimizedFor) && field.Dims%8 != 0 {
return fmt.Errorf("field: '%s', incompatible vector dimensionality for BIVF: %d,"+
" dimension should be a multiple of 8", effectiveFieldName, field.Dims)
}
if fieldAliasCtx != nil { // writing to a nil map is unsafe
fieldAliasCtx[effectiveFieldName] = field
}
return nil
}
// NormalizeVector normalizes a single vector to unit length.
// It makes a copy of the input vector to avoid modifying it in-place.
func NormalizeVector(vec []float32) []float32 {
// make a copy of the vector to avoid modifying the original
// vector in-place
vecCopy := slices.Clone(vec)
// normalize the vector copy using in-place normalization provided by faiss
return faiss.NormalizeVector(vecCopy)
}
// NormalizeMultiVector normalizes each sub-vector of size `dims` independently.
// For a flattened array containing multiple vectors, each sub-vector is
// normalized separately to unit length.
// It makes a copy of the input vector to avoid modifying it in-place.
func NormalizeMultiVector(vec []float32, dims int) []float32 {
if len(vec) == 0 || dims <= 0 || len(vec)%dims != 0 {
return vec
}
// Single vector - delegate to NormalizeVector
if len(vec) == dims {
return NormalizeVector(vec)
}
// Multi-vector - make a copy to avoid modifying the original
result := slices.Clone(vec)
// Normalize each sub-vector in-place
for i := 0; i < len(result); i += dims {
faiss.NormalizeVector(result[i : i+dims])
}
return result
}
================================================
FILE: mapping/mapping_vectors_test.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package mapping
import (
"math"
"reflect"
"strings"
"testing"
)
func TestVectorFieldAliasValidation(t *testing.T) {
tests := []struct {
// input
name string // name of the test
mappingStr string // index mapping json string
// expected output
expValidity bool // validity of the mapping
errMsgs []string // error message, given expValidity is false
}{
{
name: "test1",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"type": "vector",
"dims": 3
},
{
"name": "cityVec",
"type": "vector",
"dims": 4
}
]
}
}
}
}`,
expValidity: false,
errMsgs: []string{`field: 'cityVec', invalid alias (different dimensions 4 and 3)`},
},
{
name: "test2",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"type": "vector",
"dims": 3,
"similarity": "l2_norm"
},
{
"name": "cityVec",
"type": "vector",
"dims": 3,
"similarity": "dot_product"
}
]
}
}
}
}`,
expValidity: false,
errMsgs: []string{`field: 'cityVec', invalid alias (different similarity values dot_product and l2_norm)`},
},
{
name: "test3",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"type": "vector",
"dims": 3
},
{
"name": "cityVec",
"type": "vector",
"dims": 3
}
]
}
}
}
}`,
expValidity: true,
errMsgs: []string{},
},
{
name: "test4",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"name": "vecData",
"type": "vector",
"dims": 4
}
]
},
"countryVec": {
"fields": [
{
"name": "vecData",
"type": "vector",
"dims": 3
}
]
}
}
}
}`,
expValidity: false,
errMsgs: []string{`field: 'vecData', invalid alias (different dimensions 3 and 4)`, `field: 'vecData', invalid alias (different dimensions 4 and 3)`},
},
{
name: "test5",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"name": "vecData",
"type": "vector",
"dims": 3
}
]
}
}
},
"types": {
"type1": {
"properties": {
"cityVec": {
"fields": [
{
"name": "vecData",
"type": "vector",
"dims": 4
}
]
}
}
}
}
}`,
expValidity: false,
errMsgs: []string{`field: 'vecData', invalid alias (different dimensions 4 and 3)`},
},
// Test 6: Different vector index optimization values (alias case)
{
name: "different_optimization_alias",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"type": "vector",
"dims": 3,
"vector_index_optimized_for": "recall"
},
{
"name": "cityVec",
"type": "vector",
"dims": 3,
"vector_index_optimized_for": "latency"
}
]
}
}
}
}`,
expValidity: false,
errMsgs: []string{`field: 'cityVec', invalid alias (different vector index optimization values latency and recall)`},
},
// Test 7: Invalid dimensions - below minimum
{
name: "dims_below_minimum",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"type": "vector",
"dims": 0
}
]
}
}
}
}`,
expValidity: false,
errMsgs: []string{`field: 'cityVec', invalid vector dimension: 0, value should be in range [1, 4096]`},
},
// Test 8: Invalid dimensions - above maximum
{
name: "dims_above_maximum",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"type": "vector",
"dims": 5000
}
]
}
}
}
}`,
expValidity: false,
errMsgs: []string{`field: 'cityVec', invalid vector dimension: 5000, value should be in range [1, 4096]`},
},
// Test 9: Invalid similarity metric
{
name: "invalid_similarity_metric",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"type": "vector",
"dims": 3,
"similarity": "invalid_metric"
}
]
}
}
}
}`,
expValidity: false,
// Note: error message contains map keys which have non-deterministic order
errMsgs: []string{`invalid similarity metric: 'invalid_metric'`},
},
// Test 10: Invalid vector index optimization
{
name: "invalid_optimization",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"type": "vector",
"dims": 3,
"vector_index_optimized_for": "invalid_opt"
}
]
}
}
}
}`,
expValidity: false,
// Note: error message contains map keys which have non-deterministic order
errMsgs: []string{`invalid vector index optimization: 'invalid_opt'`},
},
// Test 11: vector_base64 type with valid dimensions
{
name: "vector_base64_valid",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"type": "vector_base64",
"dims": 128
}
]
}
}
}
}`,
expValidity: true,
errMsgs: []string{},
},
// Test 12: vector_base64 alias with different dimensions
{
name: "vector_base64_different_dims_alias",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"type": "vector_base64",
"dims": 128
},
{
"name": "cityVec",
"type": "vector_base64",
"dims": 256
}
]
}
}
}
}`,
expValidity: false,
errMsgs: []string{`field: 'cityVec', invalid alias (different dimensions 256 and 128)`},
},
// Test 13: Default similarity matching explicit similarity in alias
{
name: "default_similarity_matches_explicit",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"type": "vector",
"dims": 3
},
{
"name": "cityVec",
"type": "vector",
"dims": 3,
"similarity": "l2_norm"
}
]
}
}
}
}`,
expValidity: true,
errMsgs: []string{},
},
// Test 14: Default optimization matching explicit optimization in alias
{
name: "default_optimization_matches_explicit",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"type": "vector",
"dims": 3
},
{
"name": "cityVec",
"type": "vector",
"dims": 3,
"vector_index_optimized_for": "recall"
}
]
}
}
}
}`,
expValidity: true,
errMsgs: []string{},
},
// Test 15: Valid alias with all explicit matching values
{
name: "valid_alias_all_explicit_matching",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"type": "vector",
"dims": 64,
"similarity": "dot_product",
"vector_index_optimized_for": "latency"
},
{
"name": "cityVec",
"type": "vector",
"dims": 64,
"similarity": "dot_product",
"vector_index_optimized_for": "latency"
}
]
}
}
}
}`,
expValidity: true,
errMsgs: []string{},
},
// Test 16: Cross-property alias with different similarity
{
name: "cross_property_different_similarity",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"name": "vecData",
"type": "vector",
"dims": 3,
"similarity": "cosine"
}
]
},
"countryVec": {
"fields": [
{
"name": "vecData",
"type": "vector",
"dims": 3,
"similarity": "l2_norm"
}
]
}
}
}
}`,
expValidity: false,
errMsgs: []string{
`field: 'vecData', invalid alias (different similarity values l2_norm and cosine)`,
`field: 'vecData', invalid alias (different similarity values cosine and l2_norm)`,
},
},
// Test 17: Cross-property alias with different optimization
{
name: "cross_property_different_optimization",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"name": "vecData",
"type": "vector",
"dims": 3,
"vector_index_optimized_for": "recall"
}
]
},
"countryVec": {
"fields": [
{
"name": "vecData",
"type": "vector",
"dims": 3,
"vector_index_optimized_for": "memory-efficient"
}
]
}
}
}
}`,
expValidity: false,
errMsgs: []string{
`field: 'vecData', invalid alias (different vector index optimization values memory-efficient and recall)`,
`field: 'vecData', invalid alias (different vector index optimization values recall and memory-efficient)`,
},
},
// Test 18: Valid cross-property alias with matching values
{
name: "valid_cross_property_alias",
mappingStr: `
{
"default_mapping": {
"properties": {
"cityVec": {
"fields": [
{
"name": "vecData",
"type": "vector",
"dims": 64,
"similarity": "dot_product",
"vector_index_optimized_for": "latency"
}
]
},
"countryVec": {
"fields": [
{
"name": "vecData",
"type": "vector",
"dims": 64,
"similarity": "dot_product",
"vector_index_optimized_for": "latency"
}
]
}
}
}
}`,
expValidity: true,
errMsgs: []string{},
},
// Test 20: Different fully qualified paths - a.b.c.f vs f (different effective names, no conflict)
{
name: "different_fq_paths_no_conflict",
mappingStr: `
{
"default_mapping": {
"properties": {
"a": {
"properties": {
"b": {
"properties": {
"c": {
"fields": [
{
"name": "f",
"type": "vector",
"dims": 64
}
]
}
}
}
}
},
"x": {
"fields": [
{
"name": "f",
"type": "vector",
"dims": 128
}
]
}
}
}
}`,
expValidity: true,
errMsgs: []string{},
},
// Test 21: Same leaf property name at different paths (a.b.vec vs x.y.vec) - no conflict
{
name: "same_leaf_different_paths_no_conflict",
mappingStr: `
{
"default_mapping": {
"properties": {
"a": {
"properties": {
"b": {
"properties": {
"vec": {
"fields": [
{
"type": "vector",
"dims": 64
}
]
}
}
}
}
},
"x": {
"properties": {
"y": {
"properties": {
"vec": {
"fields": [
{
"type": "vector",
"dims": 128
}
]
}
}
}
}
}
}
}
}`,
expValidity: true,
errMsgs: []string{},
},
// Test 22: Field name override creates same effective name - alias conflict
// a.b with name "data" → effective "a.data"
// a with name "data" → effective "data"
// These are different, so no conflict
{
name: "field_name_override_different_parents_no_conflict",
mappingStr: `
{
"default_mapping": {
"properties": {
"a": {
"properties": {
"b": {
"fields": [
{
"name": "data",
"type": "vector",
"dims": 64
}
]
}
}
},
"a2": {
"fields": [
{
"name": "data",
"type": "vector",
"dims": 128
}
]
}
}
}
}`,
expValidity: true,
errMsgs: []string{},
},
// Test 23: Same effective field name via name override - should conflict
// a.b with name "sharedVec" → effective "a.sharedVec"
// a.c with name "sharedVec" → effective "a.sharedVec"
// Both resolve to same effective name with different dims → conflict
{
name: "same_effective_name_via_override_conflict",
mappingStr: `
{
"default_mapping": {
"properties": {
"a": {
"properties": {
"b": {
"fields": [
{
"name": "sharedVec",
"type": "vector",
"dims": 64
}
]
},
"c": {
"fields": [
{
"name": "sharedVec",
"type": "vector",
"dims": 128
}
]
}
}
}
}
}
}`,
expValidity: false,
errMsgs: []string{
`field: 'a.sharedVec', invalid alias (different dimensions 128 and 64)`,
`field: 'a.sharedVec', invalid alias (different dimensions 64 and 128)`,
},
},
// Test 24: Deep nesting with same effective name via name override - should conflict
// level1.level2.propA with name "vec" → effective "level1.level2.vec"
// level1.level2.propB with name "vec" → effective "level1.level2.vec"
{
name: "deep_nesting_same_effective_name_conflict",
mappingStr: `
{
"default_mapping": {
"properties": {
"level1": {
"properties": {
"level2": {
"properties": {
"propA": {
"fields": [
{
"name": "vec",
"type": "vector",
"dims": 64
}
]
},
"propB": {
"fields": [
{
"name": "vec",
"type": "vector",
"dims": 128
}
]
}
}
}
}
}
}
}
}`,
expValidity: false,
errMsgs: []string{
`field: 'level1.level2.vec', invalid alias (different dimensions 128 and 64)`,
`field: 'level1.level2.vec', invalid alias (different dimensions 64 and 128)`,
},
},
// Test 25: Root level field vs nested field with same name - no conflict
// Root: "embedding" → effective "embedding"
// Nested: a.b.embedding → effective "a.b.embedding"
{
name: "root_vs_nested_same_name_no_conflict",
mappingStr: `
{
"default_mapping": {
"properties": {
"embedding": {
"fields": [
{
"type": "vector",
"dims": 64
}
]
},
"nested": {
"properties": {
"deep": {
"properties": {
"embedding": {
"fields": [
{
"type": "vector",
"dims": 256
}
]
}
}
}
}
}
}
}
}`,
expValidity: true,
errMsgs: []string{},
},
// Test 26: Multiple levels with name override targeting same effective path
// a.b.x with name "target" → effective "a.b.target"
// a.b.target (no override) → effective "a.b.target"
// Same effective name, different dims → conflict
{
name: "name_override_matches_sibling_path_conflict",
mappingStr: `
{
"default_mapping": {
"properties": {
"a": {
"properties": {
"b": {
"properties": {
"x": {
"fields": [
{
"name": "target",
"type": "vector",
"dims": 64
}
]
},
"target": {
"fields": [
{
"type": "vector",
"dims": 128
}
]
}
}
}
}
}
}
}
}`,
expValidity: false,
errMsgs: []string{
`field: 'a.b.target', invalid alias (different dimensions 128 and 64)`,
`field: 'a.b.target', invalid alias (different dimensions 64 and 128)`,
},
},
// Test 27: Valid alias at deep nesting level
{
name: "valid_alias_deep_nesting",
mappingStr: `
{
"default_mapping": {
"properties": {
"a": {
"properties": {
"b": {
"properties": {
"c": {
"properties": {
"vec": {
"fields": [
{
"type": "vector",
"dims": 128,
"similarity": "dot_product"
},
{
"name": "vec",
"type": "vector",
"dims": 128,
"similarity": "dot_product"
}
]
}
}
}
}
}
}
}
}
}
}`,
expValidity: true,
errMsgs: []string{},
},
// Test 28: Valid alias with different paths but same effective field name
// vectors.vec with name "vec" → effective "vectors.vec"
// vec with name "vec" → effective "vec"
// Different effective names, so no conflict
{
name: "valid_alias_different_paths_same_field_name",
mappingStr: `
{
"default_mapping": {
"dynamic": false,
"enabled": true,
"properties": {
"vectors": {
"dynamic": true,
"enabled": true,
"properties": {
"vec": {
"enabled": true,
"dynamic": false,
"fields": [
{
"dims": 3,
"index": true,
"name": "vec",
"type": "vector"
}
]
}
}
},
"vec": {
"enabled": true,
"dynamic": false,
"fields": [
{
"dims": 3,
"index": true,
"name": "vec",
"similarity": "l2_norm",
"type": "vector",
"vector_index_optimized_for": "recall"
}
]
}
}
}
}`,
expValidity: true,
errMsgs: []string{},
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
im := NewIndexMapping()
err := im.UnmarshalJSON([]byte(test.mappingStr))
if err != nil {
t.Fatalf("failed to unmarshal index mapping: %v", err)
}
err = im.Validate()
isValid := err == nil
if test.expValidity != isValid {
t.Fatalf("validity mismatch, expected: %v, got: %v",
test.expValidity, isValid)
}
if !isValid {
errStringMatched := false
for _, possibleErrMsg := range test.errMsgs {
// Use Contains for matching since some error messages include
// map keys which have non-deterministic ordering
if err.Error() == possibleErrMsg || strings.Contains(err.Error(), possibleErrMsg) {
errStringMatched = true
break
}
}
if !errStringMatched {
t.Fatalf("invalid error message, expected one of: %v, got: %v",
test.errMsgs, err.Error())
}
}
})
}
}
// A test case for processVector function
type vectorTest struct {
// Input
ipVec interface{} // input vector
dims int // dimensionality of input vector
// Expected Output
expValidity bool // expected validity of the input
expOpVec []float32 // expected output vector, given the input is valid
}
func TestProcessVector(t *testing.T) {
// Note: while creating vectors, we are using []any instead of []float32,
// this is done to enhance our test coverage.
// When we unmarshal a vector from a JSON, we get []any, not []float32.
tests := []vectorTest{
// # Flat vectors
// ## numeric cases
// (all numeric elements)
{[]any{1, 2.2, 3}, 3, true, []float32{1, 2.2, 3}}, // len==dims
{[]any{1, 2.2, 3}, 2, false, nil}, // len>dims
{[]any{1, 2.2, 3}, 4, false, nil}, // lendims
{[]any{[]any{1, 2, 3}}, 2, false, nil}, // len 0 && len(tt.input) > 0 && len(tt.input)%tt.dims == 0 {
numVecs := len(result) / tt.dims
for i := 0; i < numVecs; i++ {
subVec := result[i*tt.dims : (i+1)*tt.dims]
mag := magnitude(subVec)
// Allow for zero vectors (magnitude 0) or unit vectors (magnitude 1)
if mag > 1e-6 && !floatApproxEqual(mag, 1.0, 1e-5) {
t.Errorf("sub-vector %d has magnitude %v, expected 1.0", i, mag)
}
}
}
})
}
}
// Helper to compute magnitude of a vector
func magnitude(v []float32) float32 {
var sum float32
for _, x := range v {
sum += x * x
}
return float32(math.Sqrt(float64(sum)))
}
// Helper for approximate float comparison
func floatApproxEqual(a, b, epsilon float32) bool {
diff := a - b
if diff < 0 {
diff = -diff
}
return diff < epsilon
}
================================================
FILE: mapping/reflect.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
import (
"reflect"
"strings"
)
func lookupPropertyPath(data interface{}, path string) interface{} {
pathParts := decodePath(path)
current := data
for _, part := range pathParts {
current = lookupPropertyPathPart(current, part)
if current == nil {
break
}
}
return current
}
func lookupPropertyPathPart(data interface{}, part string) interface{} {
val := reflect.ValueOf(data)
if !val.IsValid() {
return nil
}
typ := val.Type()
switch typ.Kind() {
case reflect.Map:
// FIXME can add support for other map keys in the future
if typ.Key().Kind() == reflect.String {
key := reflect.ValueOf(part)
entry := val.MapIndex(key)
if entry.IsValid() {
return entry.Interface()
}
}
case reflect.Struct:
field := val.FieldByName(part)
if field.IsValid() && field.CanInterface() {
return field.Interface()
}
case reflect.Ptr:
ptrElem := val.Elem()
if ptrElem.IsValid() && ptrElem.CanInterface() {
return lookupPropertyPathPart(ptrElem.Interface(), part)
}
}
return nil
}
const pathSeparator = "."
func decodePath(path string) []string {
return strings.Split(path, pathSeparator)
}
func encodePath(pathElements []string) string {
return strings.Join(pathElements, pathSeparator)
}
func mustString(data interface{}) (string, bool) {
if data != nil {
str, ok := data.(string)
if ok {
return str, true
}
}
return "", false
}
// parseTagName extracts the field name from a struct tag
func parseTagName(tag string) string {
if idx := strings.Index(tag, ","); idx != -1 {
return tag[:idx]
}
return tag
}
================================================
FILE: mapping/reflect_test.go
================================================
package mapping
import (
"reflect"
"testing"
)
func TestLookupPropertyPath(t *testing.T) {
tests := []struct {
input interface{}
path string
output interface{}
}{
{
input: map[string]interface{}{
"Type": "a",
},
path: "Type",
output: "a",
},
{
input: struct {
Type string
}{
Type: "b",
},
path: "Type",
output: "b",
},
{
input: &struct {
Type string
}{
Type: "b",
},
path: "Type",
output: "b",
},
}
for _, test := range tests {
actual := lookupPropertyPath(test.input, test.path)
if !reflect.DeepEqual(actual, test.output) {
t.Fatalf("expected '%v', got '%v', for path '%s' in %+v", test.output, actual, test.path, test.input)
}
}
}
================================================
FILE: mapping/synonym.go
================================================
// Copyright (c) 2024 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package mapping
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/registry"
)
type SynonymSource struct {
CollectionName string `json:"collection"`
AnalyzerName string `json:"analyzer"`
}
func NewSynonymSource(collection, analyzer string) *SynonymSource {
return &SynonymSource{
CollectionName: collection,
AnalyzerName: analyzer,
}
}
func (s *SynonymSource) Collection() string {
return s.CollectionName
}
func (s *SynonymSource) Analyzer() string {
return s.AnalyzerName
}
func (s *SynonymSource) SetCollection(c string) {
s.CollectionName = c
}
func (s *SynonymSource) SetAnalyzer(a string) {
s.AnalyzerName = a
}
func SynonymSourceConstructor(config map[string]interface{}, cache *registry.Cache) (analysis.SynonymSource, error) {
collection, ok := config["collection"].(string)
if !ok {
return nil, fmt.Errorf("must specify collection")
}
analyzer, ok := config["analyzer"].(string)
if !ok {
return nil, fmt.Errorf("must specify analyzer")
}
if _, err := cache.AnalyzerNamed(analyzer); err != nil {
return nil, fmt.Errorf("analyzer named '%s' not found", analyzer)
}
return NewSynonymSource(collection, analyzer), nil
}
func init() {
err := registry.RegisterSynonymSource(analysis.SynonymSourceType, SynonymSourceConstructor)
if err != nil {
panic(err)
}
}
================================================
FILE: mapping.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import "github.com/blevesearch/bleve/v2/mapping"
// NewIndexMapping creates a new IndexMapping that will use all the default indexing rules
func NewIndexMapping() *mapping.IndexMappingImpl {
return mapping.NewIndexMapping()
}
// NewDocumentMapping returns a new document mapping
// with all the default values.
func NewDocumentMapping() *mapping.DocumentMapping {
return mapping.NewDocumentMapping()
}
// NewDocumentStaticMapping returns a new document
// mapping that will not automatically index parts
// of a document without an explicit mapping.
func NewDocumentStaticMapping() *mapping.DocumentMapping {
return mapping.NewDocumentStaticMapping()
}
// NewNestedDocumentMapping returns a new document mapping
// that will treat all objects as nested documents.
func NewNestedDocumentMapping() *mapping.DocumentMapping {
return mapping.NewNestedDocumentMapping()
}
// NewNestedDocumentStaticMapping returns a new document mapping
// that will treat all objects as nested documents and
// will not automatically index parts of a nested document
// without an explicit mapping.
func NewNestedDocumentStaticMapping() *mapping.DocumentMapping {
return mapping.NewNestedDocumentStaticMapping()
}
// NewDocumentDisabledMapping returns a new document
// mapping that will not perform any indexing.
func NewDocumentDisabledMapping() *mapping.DocumentMapping {
return mapping.NewDocumentDisabledMapping()
}
// NewTextFieldMapping returns a default field mapping for text
func NewTextFieldMapping() *mapping.FieldMapping {
return mapping.NewTextFieldMapping()
}
// NewKeywordFieldMapping returns a field mapping for text using the keyword
// analyzer, which essentially doesn't apply any specific text analysis.
func NewKeywordFieldMapping() *mapping.FieldMapping {
return mapping.NewKeywordFieldMapping()
}
// NewNumericFieldMapping returns a default field mapping for numbers
func NewNumericFieldMapping() *mapping.FieldMapping {
return mapping.NewNumericFieldMapping()
}
// NewDateTimeFieldMapping returns a default field mapping for dates
func NewDateTimeFieldMapping() *mapping.FieldMapping {
return mapping.NewDateTimeFieldMapping()
}
// NewBooleanFieldMapping returns a default field mapping for booleans
func NewBooleanFieldMapping() *mapping.FieldMapping {
return mapping.NewBooleanFieldMapping()
}
func NewGeoPointFieldMapping() *mapping.FieldMapping {
return mapping.NewGeoPointFieldMapping()
}
func NewGeoShapeFieldMapping() *mapping.FieldMapping {
return mapping.NewGeoShapeFieldMapping()
}
func NewIPFieldMapping() *mapping.FieldMapping {
return mapping.NewIPFieldMapping()
}
================================================
FILE: mapping_vector.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package bleve
import "github.com/blevesearch/bleve/v2/mapping"
func NewVectorFieldMapping() *mapping.FieldMapping {
return mapping.NewVectorFieldMapping()
}
func NewVectorBase64FieldMapping() *mapping.FieldMapping {
return mapping.NewVectorBase64FieldMapping()
}
================================================
FILE: numeric/bin.go
================================================
package numeric
var interleaveMagic = []uint64{
0x5555555555555555,
0x3333333333333333,
0x0F0F0F0F0F0F0F0F,
0x00FF00FF00FF00FF,
0x0000FFFF0000FFFF,
0x00000000FFFFFFFF,
0xAAAAAAAAAAAAAAAA,
}
var interleaveShift = []uint{1, 2, 4, 8, 16}
// Interleave the first 32 bits of each uint64
// adapted from org.apache.lucene.util.BitUtil
// which was adapted from:
// http://graphics.stanford.edu/~seander/bithacks.html#InterleaveBMN
func Interleave(v1, v2 uint64) uint64 {
v1 = (v1 | (v1 << interleaveShift[4])) & interleaveMagic[4]
v1 = (v1 | (v1 << interleaveShift[3])) & interleaveMagic[3]
v1 = (v1 | (v1 << interleaveShift[2])) & interleaveMagic[2]
v1 = (v1 | (v1 << interleaveShift[1])) & interleaveMagic[1]
v1 = (v1 | (v1 << interleaveShift[0])) & interleaveMagic[0]
v2 = (v2 | (v2 << interleaveShift[4])) & interleaveMagic[4]
v2 = (v2 | (v2 << interleaveShift[3])) & interleaveMagic[3]
v2 = (v2 | (v2 << interleaveShift[2])) & interleaveMagic[2]
v2 = (v2 | (v2 << interleaveShift[1])) & interleaveMagic[1]
v2 = (v2 | (v2 << interleaveShift[0])) & interleaveMagic[0]
return (v2 << 1) | v1
}
// Deinterleave the 32-bit value starting at position 0
// to get the other 32-bit value, shift it by 1 first
func Deinterleave(b uint64) uint64 {
b &= interleaveMagic[0]
b = (b ^ (b >> interleaveShift[0])) & interleaveMagic[1]
b = (b ^ (b >> interleaveShift[1])) & interleaveMagic[2]
b = (b ^ (b >> interleaveShift[2])) & interleaveMagic[3]
b = (b ^ (b >> interleaveShift[3])) & interleaveMagic[4]
b = (b ^ (b >> interleaveShift[4])) & interleaveMagic[5]
return b
}
================================================
FILE: numeric/bin_test.go
================================================
package numeric
import "testing"
func TestInterleaveDeinterleave(t *testing.T) {
tests := []struct {
v1 uint64
v2 uint64
}{
{0, 0},
{1, 1},
{27, 39},
{1<<32 - 1, 1<<32 - 1}, // largest that should still work
}
for _, test := range tests {
i := Interleave(test.v1, test.v2)
gotv1 := Deinterleave(i)
gotv2 := Deinterleave(i >> 1)
if gotv1 != test.v1 {
t.Errorf("expected v1: %d, got %d, interleaved was %x", test.v1, gotv1, i)
}
if gotv2 != test.v2 {
t.Errorf("expected v2: %d, got %d, interleaved was %x", test.v2, gotv2, i)
}
}
}
================================================
FILE: numeric/float.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package numeric
import (
"math"
)
func Float64ToInt64(f float64) int64 {
fasint := int64(math.Float64bits(f))
if fasint < 0 {
fasint = fasint ^ 0x7fffffffffffffff
}
return fasint
}
func Int64ToFloat64(i int64) float64 {
if i < 0 {
i ^= 0x7fffffffffffffff
}
return math.Float64frombits(uint64(i))
}
================================================
FILE: numeric/float_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package numeric
import (
"testing"
)
// test that the float/sortable int operations work both ways
// and that the corresponding integers sort the same as
// the original floats would have
func TestSortabledFloat64ToInt64(t *testing.T) {
tests := []struct {
input float64
}{
{
input: -4640094584139352638,
},
{
input: -167.42,
},
{
input: -1.11,
},
{
input: 0,
},
{
input: 3.14,
},
{
input: 167.42,
},
}
var lastInt64 *int64
for _, test := range tests {
actual := Float64ToInt64(test.input)
if lastInt64 != nil {
// check that this float is greater than the last one
if actual <= *lastInt64 {
t.Errorf("expected greater than prev, this: %d, last %d", actual, *lastInt64)
}
}
lastInt64 = &actual
convertedBack := Int64ToFloat64(actual)
// assert that we got back what we started with
if convertedBack != test.input {
t.Errorf("expected %f, got %f", test.input, convertedBack)
}
}
}
================================================
FILE: numeric/prefix_coded.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package numeric
import "fmt"
const ShiftStartInt64 byte = 0x20
// PrefixCoded is a byte array encoding of
// 64-bit numeric values shifted by 0-63 bits
type PrefixCoded []byte
func NewPrefixCodedInt64(in int64, shift uint) (PrefixCoded, error) {
rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, nil)
return rv, err
}
func NewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) (
rv PrefixCoded, preallocRest []byte, err error) {
if shift > 63 {
return nil, prealloc, fmt.Errorf("cannot shift %d, must be between 0 and 63", shift)
}
nChars := ((63 - shift) / 7) + 1
size := int(nChars + 1)
if len(prealloc) >= size {
rv = PrefixCoded(prealloc[0:size])
preallocRest = prealloc[size:]
} else {
rv = make(PrefixCoded, size)
}
rv[0] = ShiftStartInt64 + byte(shift)
sortableBits := int64(uint64(in) ^ 0x8000000000000000)
sortableBits = int64(uint64(sortableBits) >> shift)
for nChars > 0 {
// Store 7 bits per byte for compatibility
// with UTF-8 encoding of terms
rv[nChars] = byte(sortableBits & 0x7f)
nChars--
sortableBits = int64(uint64(sortableBits) >> 7)
}
return rv, preallocRest, nil
}
func MustNewPrefixCodedInt64(in int64, shift uint) PrefixCoded {
rv, err := NewPrefixCodedInt64(in, shift)
if err != nil {
panic(err)
}
return rv
}
func MustNewPrefixCodedInt64Prealloc(in int64, shift uint, prealloc []byte) PrefixCoded {
rv, _, err := NewPrefixCodedInt64Prealloc(in, shift, prealloc)
if err != nil {
panic(err)
}
return rv
}
// Shift returns the number of bits shifted
// returns 0 if in uninitialized state
func (p PrefixCoded) Shift() (uint, error) {
if len(p) > 0 {
shift := p[0] - ShiftStartInt64
if shift < 0 || shift < 63 {
return uint(shift), nil
}
}
return 0, fmt.Errorf("invalid prefix coded value")
}
func (p PrefixCoded) Int64() (int64, error) {
shift, err := p.Shift()
if err != nil {
return 0, err
}
var sortableBits int64
for _, inbyte := range p[1:] {
sortableBits <<= 7
sortableBits |= int64(inbyte)
}
return int64(uint64((sortableBits << shift)) ^ 0x8000000000000000), nil
}
func ValidPrefixCodedTerm(p string) (bool, int) {
return ValidPrefixCodedTermBytes([]byte(p))
}
func ValidPrefixCodedTermBytes(p []byte) (bool, int) {
if len(p) > 0 {
if p[0] < ShiftStartInt64 || p[0] > ShiftStartInt64+63 {
return false, 0
}
shift := p[0] - ShiftStartInt64
nChars := ((63 - int(shift)) / 7) + 1
if len(p) != nChars+1 {
return false, 0
}
return true, int(shift)
}
return false, 0
}
================================================
FILE: numeric/prefix_coded_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package numeric
import (
"reflect"
"testing"
)
var tests = []struct {
input int64
shift uint
output PrefixCoded
}{
{
input: 1,
shift: 0,
output: PrefixCoded{0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
},
{
input: -1,
shift: 0,
output: PrefixCoded{0x20, 0x0, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f},
},
{
input: -94582,
shift: 0,
output: PrefixCoded{0x20, 0x0, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7a, 0x1d, 0xa},
},
{
input: 314729851,
shift: 0,
output: PrefixCoded{0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x1, 0x16, 0x9, 0x4a, 0x7b},
},
{
input: 314729851,
shift: 4,
output: PrefixCoded{0x24, 0x8, 0x0, 0x0, 0x0, 0x0, 0x9, 0x30, 0x4c, 0x57},
},
{
input: 314729851,
shift: 8,
output: PrefixCoded{0x28, 0x40, 0x0, 0x0, 0x0, 0x0, 0x4b, 0x4, 0x65},
},
{
input: 314729851,
shift: 16,
output: PrefixCoded{0x30, 0x20, 0x0, 0x0, 0x0, 0x0, 0x25, 0x42},
},
{
input: 314729851,
shift: 32,
output: PrefixCoded{0x40, 0x8, 0x0, 0x0, 0x0, 0x0},
},
{
input: 1234729851,
shift: 32,
output: PrefixCoded{0x40, 0x8, 0x0, 0x0, 0x0, 0x0},
},
}
// these array encoding values have been verified manually
// against the lucene implementation
func TestPrefixCoded(t *testing.T) {
for _, test := range tests {
actual, err := NewPrefixCodedInt64(test.input, test.shift)
if err != nil {
t.Error(err)
}
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected %#v, got %#v", test.output, actual)
}
checkedShift, err := actual.Shift()
if err != nil {
t.Error(err)
}
if checkedShift != test.shift {
t.Errorf("expected %d, got %d", test.shift, checkedShift)
}
// if the shift was 0, make sure we can go back to the original
if test.shift == 0 {
backToLong, err := actual.Int64()
if err != nil {
t.Error(err)
}
if backToLong != test.input {
t.Errorf("expected %v, got %v", test.input, backToLong)
}
}
}
}
func TestPrefixCodedValid(t *testing.T) {
// all of the shared tests should be valid
for _, test := range tests {
valid, _ := ValidPrefixCodedTerm(string(test.output))
if !valid {
t.Errorf("expected %s to be valid prefix coded, is not", string(test.output))
}
}
invalidTests := []struct {
data PrefixCoded
}{
// first byte invalid skip (too low)
{
data: PrefixCoded{0x19, 'c', 'a', 't'},
},
// first byte invalid skip (too high)
{
data: PrefixCoded{0x20 + 64, 'c'},
},
// length of trailing bytes wrong (too long)
{
data: PrefixCoded{0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1},
},
// length of trailing bytes wrong (too short)
{
data: PrefixCoded{0x20 + 63},
},
}
// all of the shared tests should be valid
for _, test := range invalidTests {
valid, _ := ValidPrefixCodedTerm(string(test.data))
if valid {
t.Errorf("expected %s to be invalid prefix coded, it is", string(test.data))
}
}
}
func BenchmarkTestPrefixCoded(b *testing.B) {
for i := 0; i < b.N; i++ {
for _, test := range tests {
actual, err := NewPrefixCodedInt64(test.input, test.shift)
if err != nil {
b.Error(err)
}
if !reflect.DeepEqual(actual, test.output) {
b.Errorf("expected %#v, got %#v", test.output, actual)
}
}
}
}
================================================
FILE: pre_search.go
================================================
// Copyright (c) 2024 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"github.com/blevesearch/bleve/v2/search"
)
// A preSearchResultProcessor processes the data in
// the preSearch result from multiple
// indexes in an alias and merges them together to
// create the final preSearch result
type preSearchResultProcessor interface {
// adds the preSearch result to the processor
add(*SearchResult, string)
// updates the final search result with the finalized
// data from the processor
finalize(*SearchResult)
}
// -----------------------------------------------------------------------------
// KNN preSearchResultProcessor for handling KNN presearch results
type knnPreSearchResultProcessor struct {
addFn func(sr *SearchResult, indexName string)
finalizeFn func(sr *SearchResult)
}
func (k *knnPreSearchResultProcessor) add(sr *SearchResult, indexName string) {
if k.addFn != nil {
k.addFn(sr, indexName)
}
}
func (k *knnPreSearchResultProcessor) finalize(sr *SearchResult) {
if k.finalizeFn != nil {
k.finalizeFn(sr)
}
}
// -----------------------------------------------------------------------------
// Synonym preSearchResultProcessor for handling Synonym presearch results
type synonymPreSearchResultProcessor struct {
finalizedFts search.FieldTermSynonymMap
}
func newSynonymPreSearchResultProcessor() *synonymPreSearchResultProcessor {
return &synonymPreSearchResultProcessor{}
}
func (s *synonymPreSearchResultProcessor) add(sr *SearchResult, indexName string) {
// Check if SynonymResult or the synonym data key is nil
if sr.SynonymResult == nil {
return
}
// Attempt to cast PreSearchResults to FieldTermSynonymMap
// Merge with finalizedFts or initialize it if nil
if s.finalizedFts == nil {
s.finalizedFts = sr.SynonymResult
} else {
s.finalizedFts.MergeWith(sr.SynonymResult)
}
}
func (s *synonymPreSearchResultProcessor) finalize(sr *SearchResult) {
// Set the finalized synonym data to the PreSearchResults
if s.finalizedFts != nil {
sr.SynonymResult = s.finalizedFts
}
}
type bm25PreSearchResultProcessor struct {
docCount float64 // bm25 specific stats
fieldCardinality map[string]int
}
func newBM25PreSearchResultProcessor() *bm25PreSearchResultProcessor {
return &bm25PreSearchResultProcessor{
fieldCardinality: make(map[string]int),
}
}
// TODO How will this work for queries other than term queries?
func (b *bm25PreSearchResultProcessor) add(sr *SearchResult, indexName string) {
if sr.BM25Stats != nil {
b.docCount += sr.BM25Stats.DocCount
for field, cardinality := range sr.BM25Stats.FieldCardinality {
b.fieldCardinality[field] += cardinality
}
}
}
func (b *bm25PreSearchResultProcessor) finalize(sr *SearchResult) {
sr.BM25Stats = &search.BM25Stats{
DocCount: b.docCount,
FieldCardinality: b.fieldCardinality,
}
}
// -----------------------------------------------------------------------------
// Master struct that can hold any number of presearch result processors
type compositePreSearchResultProcessor struct {
presearchResultProcessors []preSearchResultProcessor
}
// Implements the add method, which forwards to all the internal processors
func (m *compositePreSearchResultProcessor) add(sr *SearchResult, indexName string) {
for _, p := range m.presearchResultProcessors {
p.add(sr, indexName)
}
}
// Implements the finalize method, which forwards to all the internal processors
func (m *compositePreSearchResultProcessor) finalize(sr *SearchResult) {
for _, p := range m.presearchResultProcessors {
p.finalize(sr)
}
}
// -----------------------------------------------------------------------------
// Function to create the appropriate preSearchResultProcessor(s)
func createPreSearchResultProcessor(req *SearchRequest, flags *preSearchFlags) preSearchResultProcessor {
// return nil for invalid input
if flags == nil || req == nil {
return nil
}
var processors []preSearchResultProcessor
// Add KNN processor if the request has KNN
if flags.knn {
if knnProcessor := newKnnPreSearchResultProcessor(req); knnProcessor != nil {
processors = append(processors, knnProcessor)
}
}
// Add Synonym processor if the request has Synonym
if flags.synonyms {
if synonymProcessor := newSynonymPreSearchResultProcessor(); synonymProcessor != nil {
processors = append(processors, synonymProcessor)
}
}
if flags.bm25 {
if bm25Processtor := newBM25PreSearchResultProcessor(); bm25Processtor != nil {
processors = append(processors, bm25Processtor)
}
}
// Return based on the number of processors, optimizing for the common case of 1 processor
// If there are no processors, return nil
switch len(processors) {
case 0:
return nil
case 1:
return processors[0]
default:
return &compositePreSearchResultProcessor{
presearchResultProcessors: processors,
}
}
}
================================================
FILE: query.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"time"
"github.com/blevesearch/bleve/v2/search/query"
)
// NewBoolFieldQuery creates a new Query for boolean fields
func NewBoolFieldQuery(val bool) *query.BoolFieldQuery {
return query.NewBoolFieldQuery(val)
}
// NewBooleanQuery creates a compound Query composed
// of several other Query objects.
// These other query objects are added using the
// AddMust() AddShould() and AddMustNot() methods.
// Result documents must satisfy ALL of the
// must Queries.
// Result documents must satisfy NONE of the must not
// Queries.
// Result documents that ALSO satisfy any of the should
// Queries will score higher.
func NewBooleanQuery() *query.BooleanQuery {
return query.NewBooleanQuery(nil, nil, nil)
}
// NewConjunctionQuery creates a new compound Query.
// Result documents must satisfy all of the queries.
func NewConjunctionQuery(conjuncts ...query.Query) *query.ConjunctionQuery {
return query.NewConjunctionQuery(conjuncts)
}
// NewDateRangeQuery creates a new Query for ranges
// of date values.
// Date strings are parsed using the DateTimeParser configured in the
//
// top-level config.QueryDateTimeParser
//
// Either, but not both endpoints can be nil.
func NewDateRangeQuery(start, end time.Time) *query.DateRangeQuery {
return query.NewDateRangeQuery(start, end)
}
// NewDateRangeInclusiveQuery creates a new Query for ranges
// of date values.
// Date strings are parsed using the DateTimeParser configured in the
//
// top-level config.QueryDateTimeParser
//
// Either, but not both endpoints can be nil.
// startInclusive and endInclusive control inclusion of the endpoints.
func NewDateRangeInclusiveQuery(start, end time.Time, startInclusive, endInclusive *bool) *query.DateRangeQuery {
return query.NewDateRangeInclusiveQuery(start, end, startInclusive, endInclusive)
}
// NewDateRangeStringQuery creates a new Query for ranges
// of date values.
// Date strings are parsed using the DateTimeParser set using
//
// the DateRangeStringQuery.SetDateTimeParser() method.
//
// If no DateTimeParser is set, then the
//
// top-level config.QueryDateTimeParser
//
// is used.
func NewDateRangeStringQuery(start, end string) *query.DateRangeStringQuery {
return query.NewDateRangeStringQuery(start, end)
}
// NewDateRangeInclusiveStringQuery creates a new Query for ranges
// of date values.
// Date strings are parsed using the DateTimeParser set using
//
// the DateRangeStringQuery.SetDateTimeParser() method.
//
// this DateTimeParser is a custom date time parser defined in the index mapping,
// using AddCustomDateTimeParser() method.
// If no DateTimeParser is set, then the
//
// top-level config.QueryDateTimeParser
//
// is used.
// Either, but not both endpoints can be nil.
// startInclusive and endInclusive control inclusion of the endpoints.
func NewDateRangeInclusiveStringQuery(start, end string, startInclusive, endInclusive *bool) *query.DateRangeStringQuery {
return query.NewDateRangeStringInclusiveQuery(start, end, startInclusive, endInclusive)
}
// NewDisjunctionQuery creates a new compound Query.
// Result documents satisfy at least one Query.
func NewDisjunctionQuery(disjuncts ...query.Query) *query.DisjunctionQuery {
return query.NewDisjunctionQuery(disjuncts)
}
// NewDocIDQuery creates a new Query object returning indexed documents among
// the specified set. Combine it with ConjunctionQuery to restrict the scope of
// other queries output.
func NewDocIDQuery(ids []string) *query.DocIDQuery {
return query.NewDocIDQuery(ids)
}
// NewFuzzyQuery creates a new Query which finds
// documents containing terms within a specific
// fuzziness of the specified term.
// The default fuzziness is 1.
//
// The current implementation uses Levenshtein edit
// distance as the fuzziness metric.
func NewFuzzyQuery(term string) *query.FuzzyQuery {
return query.NewFuzzyQuery(term)
}
// NewMatchAllQuery creates a Query which will
// match all documents in the index.
func NewMatchAllQuery() *query.MatchAllQuery {
return query.NewMatchAllQuery()
}
// NewMatchNoneQuery creates a Query which will not
// match any documents in the index.
func NewMatchNoneQuery() *query.MatchNoneQuery {
return query.NewMatchNoneQuery()
}
// NewMatchPhraseQuery creates a new Query object
// for matching phrases in the index.
// An Analyzer is chosen based on the field.
// Input text is analyzed using this analyzer.
// Token terms resulting from this analysis are
// used to build a search phrase. Result documents
// must match this phrase. Queried field must have been indexed with
// IncludeTermVectors set to true.
func NewMatchPhraseQuery(matchPhrase string) *query.MatchPhraseQuery {
return query.NewMatchPhraseQuery(matchPhrase)
}
// NewMatchQuery creates a Query for matching text.
// An Analyzer is chosen based on the field.
// Input text is analyzed using this analyzer.
// Token terms resulting from this analysis are
// used to perform term searches. Result documents
// must satisfy at least one of these term searches.
func NewMatchQuery(match string) *query.MatchQuery {
return query.NewMatchQuery(match)
}
// NewNumericRangeQuery creates a new Query for ranges
// of numeric values.
// Either, but not both endpoints can be nil.
// The minimum value is inclusive.
// The maximum value is exclusive.
func NewNumericRangeQuery(min, max *float64) *query.NumericRangeQuery {
return query.NewNumericRangeQuery(min, max)
}
// NewNumericRangeInclusiveQuery creates a new Query for ranges
// of numeric values.
// Either, but not both endpoints can be nil.
// Control endpoint inclusion with inclusiveMin, inclusiveMax.
func NewNumericRangeInclusiveQuery(min, max *float64, minInclusive, maxInclusive *bool) *query.NumericRangeQuery {
return query.NewNumericRangeInclusiveQuery(min, max, minInclusive, maxInclusive)
}
// NewTermRangeQuery creates a new Query for ranges
// of text terms.
// Either, but not both endpoints can be "".
// The minimum value is inclusive.
// The maximum value is exclusive.
func NewTermRangeQuery(min, max string) *query.TermRangeQuery {
return query.NewTermRangeQuery(min, max)
}
// NewTermRangeInclusiveQuery creates a new Query for ranges
// of text terms.
// Either, but not both endpoints can be "".
// Control endpoint inclusion with inclusiveMin, inclusiveMax.
func NewTermRangeInclusiveQuery(min, max string, minInclusive, maxInclusive *bool) *query.TermRangeQuery {
return query.NewTermRangeInclusiveQuery(min, max, minInclusive, maxInclusive)
}
// NewPhraseQuery creates a new Query for finding
// exact term phrases in the index.
// The provided terms must exist in the correct
// order, at the correct index offsets, in the
// specified field. Queried field must have been indexed with
// IncludeTermVectors set to true.
func NewPhraseQuery(terms []string, field string) *query.PhraseQuery {
return query.NewPhraseQuery(terms, field)
}
// NewPrefixQuery creates a new Query which finds
// documents containing terms that start with the
// specified prefix.
func NewPrefixQuery(prefix string) *query.PrefixQuery {
return query.NewPrefixQuery(prefix)
}
// NewRegexpQuery creates a new Query which finds
// documents containing terms that match the
// specified regular expression.
func NewRegexpQuery(regexp string) *query.RegexpQuery {
return query.NewRegexpQuery(regexp)
}
// NewQueryStringQuery creates a new Query used for
// finding documents that satisfy a query string. The
// query string is a small query language for humans.
func NewQueryStringQuery(q string) *query.QueryStringQuery {
return query.NewQueryStringQuery(q)
}
// NewTermQuery creates a new Query for finding an
// exact term match in the index.
func NewTermQuery(term string) *query.TermQuery {
return query.NewTermQuery(term)
}
// NewWildcardQuery creates a new Query which finds
// documents containing terms that match the
// specified wildcard. In the wildcard pattern '*'
// will match any sequence of 0 or more characters,
// and '?' will match any single character.
func NewWildcardQuery(wildcard string) *query.WildcardQuery {
return query.NewWildcardQuery(wildcard)
}
// NewGeoBoundingBoxQuery creates a new Query for performing geo bounding
// box searches. The arguments describe the position of the box and documents
// which have an indexed geo point inside the box will be returned.
func NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64) *query.GeoBoundingBoxQuery {
return query.NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat)
}
// NewGeoDistanceQuery creates a new Query for performing geo distance
// searches. The arguments describe a position and a distance. Documents
// which have an indexed geo point which is less than or equal to the provided
// distance from the given position will be returned.
func NewGeoDistanceQuery(lon, lat float64, distance string) *query.GeoDistanceQuery {
return query.NewGeoDistanceQuery(lon, lat, distance)
}
// NewIPRangeQuery creates a new Query for matching IP addresses.
// If the argument is in CIDR format, then the query will match all
// IP addresses in the network specified. If the argument is an IP address,
// then the query will return documents which contain that IP.
// Both ipv4 and ipv6 are supported.
func NewIPRangeQuery(cidr string) *query.IPRangeQuery {
return query.NewIPRangeQuery(cidr)
}
// NewGeoShapeQuery creates a new Query for matching the given geo shape.
// This method can be used for creating geoshape queries for shape types
// like: point, linestring, polygon, multipoint, multilinestring,
// multipolygon and envelope.
func NewGeoShapeQuery(coordinates [][][][]float64, typ, relation string) (*query.GeoShapeQuery, error) {
return query.NewGeoShapeQuery(coordinates, typ, relation)
}
// NewGeoShapeCircleQuery creates a new query for a geoshape that is a
// circle given center point and the radius. Radius formats supported:
// "5in" "5inch" "7yd" "7yards" "9ft" "9feet" "11km" "11kilometers"
// "3nm" "3nauticalmiles" "13mm" "13millimeters" "15cm" "15centimeters"
// "17mi" "17miles" "19m" "19meters" If the unit cannot be determined,
// the entire string is parsed and the unit of meters is assumed.
func NewGeoShapeCircleQuery(coordinates []float64, radius, relation string) (*query.GeoShapeQuery, error) {
return query.NewGeoShapeCircleQuery(coordinates, radius, relation)
}
// NewGeometryCollectionQuery creates a new query for the provided
// geometrycollection coordinates and types, which could contain
// multiple geo shapes.
func NewGeometryCollectionQuery(coordinates [][][][][]float64, types []string, relation string) (*query.GeoShapeQuery, error) {
return query.NewGeometryCollectionQuery(coordinates, types, relation)
}
================================================
FILE: query_bench_test.go
================================================
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"strconv"
"testing"
"time"
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/v2/mapping"
)
func BenchmarkQueryTerm(b *testing.B) {
tmpIndexPath := createTmpIndexPath(b)
defer cleanupTmpIndexPath(b, tmpIndexPath)
fm := mapping.NewTextFieldMapping()
fm.Analyzer = keyword.Name
dmap := mapping.NewDocumentMapping()
dmap.AddFieldMappingsAt("text", fm)
imap := mapping.NewIndexMapping()
imap.DefaultMapping = dmap
idx, err := New(tmpIndexPath, imap)
if err != nil {
b.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
b.Fatal(err)
}
}()
members := []string{"abc", "abcdef", "ghi", "jkl", "jklmno"}
for i := 0; i < 100; i++ {
if err = idx.Index(strconv.Itoa(i),
map[string]interface{}{"text": members[i%len(members)]}); err != nil {
b.Fatal(err)
}
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
q := NewTermQuery(members[i%len(members)])
q.SetField("text")
req := NewSearchRequest(q)
if _, err = idx.Search(req); err != nil {
b.Fatal(err)
}
}
}
func BenchmarkQueryTermRange(b *testing.B) {
tmpIndexPath := createTmpIndexPath(b)
defer cleanupTmpIndexPath(b, tmpIndexPath)
fm := mapping.NewTextFieldMapping()
fm.Analyzer = keyword.Name
dmap := mapping.NewDocumentMapping()
dmap.AddFieldMappingsAt("text", fm)
imap := mapping.NewIndexMapping()
imap.DefaultMapping = dmap
idx, err := New(tmpIndexPath, imap)
if err != nil {
b.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
b.Fatal(err)
}
}()
members := []string{"abc", "abcdef", "ghi", "jkl", "jklmno"}
for i := 0; i < 100; i++ {
if err = idx.Index(strconv.Itoa(i),
map[string]interface{}{"text": members[i%len(members)]}); err != nil {
b.Fatal(err)
}
}
b.ReportAllocs()
b.ResetTimer()
inclusive := true
for i := 0; i < b.N; i++ {
q := NewTermRangeInclusiveQuery(
members[i%(len(members)-2)],
members[(i+2)%(len(members)-2)],
&inclusive,
&inclusive,
)
q.SetField("text")
req := NewSearchRequest(q)
if _, err = idx.Search(req); err != nil {
b.Fatal(err)
}
}
}
func BenchmarkQueryWildcard(b *testing.B) {
tmpIndexPath := createTmpIndexPath(b)
defer cleanupTmpIndexPath(b, tmpIndexPath)
fm := mapping.NewTextFieldMapping()
fm.Analyzer = keyword.Name
dmap := mapping.NewDocumentMapping()
dmap.AddFieldMappingsAt("text", fm)
imap := mapping.NewIndexMapping()
imap.DefaultMapping = dmap
idx, err := New(tmpIndexPath, imap)
if err != nil {
b.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
b.Fatal(err)
}
}()
members := []string{"abc", "abcdef", "ghi", "jkl", "jklmno"}
for i := 0; i < 100; i++ {
if err = idx.Index(strconv.Itoa(i),
map[string]interface{}{"text": members[i%len(members)]}); err != nil {
b.Fatal(err)
}
}
wildcards := []string{"ab*", "jk*"}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
q := NewWildcardQuery(wildcards[i%len(wildcards)])
q.SetField("text")
req := NewSearchRequest(q)
if _, err = idx.Search(req); err != nil {
b.Fatal(err)
}
}
}
func BenchmarkQueryNumericRange(b *testing.B) {
tmpIndexPath := createTmpIndexPath(b)
defer cleanupTmpIndexPath(b, tmpIndexPath)
fm := mapping.NewNumericFieldMapping()
dmap := mapping.NewDocumentMapping()
dmap.AddFieldMappingsAt("number", fm)
imap := mapping.NewIndexMapping()
imap.DefaultMapping = dmap
idx, err := New(tmpIndexPath, imap)
if err != nil {
b.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
b.Fatal(err)
}
}()
for i := 0; i < 100; i++ {
if err = idx.Index(strconv.Itoa(i),
map[string]interface{}{"number": i}); err != nil {
b.Fatal(err)
}
}
b.ReportAllocs()
b.ResetTimer()
inclusive := true
for i := 0; i < b.N; i++ {
start := float64(i % 90)
end := float64((i + 10) % 90)
q := NewNumericRangeInclusiveQuery(&start, &end, &inclusive, &inclusive)
q.SetField("number")
req := NewSearchRequest(q)
if _, err = idx.Search(req); err != nil {
b.Fatal(err)
}
}
}
func BenchmarkQueryDateRange(b *testing.B) {
tmpIndexPath := createTmpIndexPath(b)
defer cleanupTmpIndexPath(b, tmpIndexPath)
fm := mapping.NewDateTimeFieldMapping()
dmap := mapping.NewDocumentMapping()
dmap.AddFieldMappingsAt("date", fm)
imap := mapping.NewIndexMapping()
imap.DefaultMapping = dmap
idx, err := New(tmpIndexPath, imap)
if err != nil {
b.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
b.Fatal(err)
}
}()
members := []string{
"2022-11-16T18:45:45Z",
"2022-11-17T18:45:45Z",
"2022-11-18T18:45:45Z",
"2022-11-19T18:45:45Z",
"2022-11-20T18:45:45Z",
}
for i := 0; i < 100; i++ {
if err = idx.Index(strconv.Itoa(i),
map[string]interface{}{"date": members[i%len(members)]}); err != nil {
b.Fatal(err)
}
}
b.ReportAllocs()
b.ResetTimer()
inclusive := true
for i := 0; i < b.N; i++ {
start, _ := time.Parse("2006-01-02T15:04:05Z", members[i%(len(members)-2)])
end, _ := time.Parse("2006-01-02T15:04:05Z", members[(i+2)%(len(members)-2)])
q := NewDateRangeInclusiveQuery(start, end, &inclusive, &inclusive)
q.SetField("date")
req := NewSearchRequest(q)
if _, err = idx.Search(req); err != nil {
b.Fatal(err)
}
}
}
func BenchmarkQueryGeoDistance(b *testing.B) {
tmpIndexPath := createTmpIndexPath(b)
defer cleanupTmpIndexPath(b, tmpIndexPath)
fm := mapping.NewGeoPointFieldMapping()
dmap := mapping.NewDocumentMapping()
dmap.AddFieldMappingsAt("geo", fm)
imap := mapping.NewIndexMapping()
imap.DefaultMapping = dmap
idx, err := New(tmpIndexPath, imap)
if err != nil {
b.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
b.Fatal(err)
}
}()
members := [][]float64{
{-121.96713072883645, 37.380331474621045},
{-97.75518866579938, 30.38974491308761},
{-0.08653451918110022, 51.51063984942306},
{-2.230759791360498, 53.481514330841236},
{77.59542326042589, 12.97215865921956},
}
for i := 0; i < 100; i++ {
if err = idx.Index(strconv.Itoa(i),
map[string]interface{}{"geo": members[i%len(members)]}); err != nil {
b.Fatal(err)
}
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
coordinates := members[i%len(members)]
q := NewGeoDistanceQuery(coordinates[0], coordinates[1], "1mi")
q.SetField("geo")
req := NewSearchRequest(q)
if _, err = idx.Search(req); err != nil {
b.Fatal(err)
}
}
}
func BenchmarkQueryGeoBoundingBox(b *testing.B) {
tmpIndexPath := createTmpIndexPath(b)
defer cleanupTmpIndexPath(b, tmpIndexPath)
fm := mapping.NewGeoPointFieldMapping()
dmap := mapping.NewDocumentMapping()
dmap.AddFieldMappingsAt("geo", fm)
imap := mapping.NewIndexMapping()
imap.DefaultMapping = dmap
idx, err := New(tmpIndexPath, imap)
if err != nil {
b.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
b.Fatal(err)
}
}()
members := [][]float64{
{-121.96713072883645, 37.380331474621045},
{-97.75518866579938, 30.38974491308761},
{-0.08653451918110022, 51.51063984942306},
{-2.230759791360498, 53.481514330841236},
{77.59542326042589, 12.97215865921956},
}
for i := 0; i < 100; i++ {
if err = idx.Index(strconv.Itoa(i),
map[string]interface{}{"geo": members[i%len(members)]}); err != nil {
b.Fatal(err)
}
}
boundingBoxes := []struct {
topLeft []float64
bottomRight []float64
}{
{
topLeft: []float64{-122.14424992609722, 37.49751487670511},
bottomRight: []float64{-121.78076546622579, 37.26963069737202},
},
{
topLeft: []float64{-97.85362236226437, 30.473743975245725},
bottomRight: []float64{-97.58691085968482, 30.285211697102895},
},
{
topLeft: []float64{-0.28538822102223094, 51.61106497119687},
bottomRight: []float64{0.16776748108466677, 51.395702237541286},
},
{
topLeft: []float64{-2.373683904907921, 53.54371945714075},
bottomRight: []float64{-2.134365533113197, 53.41788831720595},
},
{
topLeft: []float64{77.52617635172015, 13.037587208986437},
bottomRight: []float64{77.66508989028102, 12.924426170584738},
},
}
b.ReportAllocs()
b.ResetTimer()
for i := 0; i < b.N; i++ {
topLeftCoordinates := boundingBoxes[i%len(boundingBoxes)].topLeft
bottomRightCoordinates := boundingBoxes[i%len(boundingBoxes)].bottomRight
q := NewGeoBoundingBoxQuery(
topLeftCoordinates[0],
topLeftCoordinates[1],
bottomRightCoordinates[0],
bottomRightCoordinates[1],
)
q.SetField("geo")
req := NewSearchRequest(q)
if _, err = idx.Search(req); err != nil {
b.Fatal(err)
}
}
}
================================================
FILE: registry/analyzer.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package registry
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
)
func RegisterAnalyzer(name string, constructor AnalyzerConstructor) error {
_, exists := analyzers[name]
if exists {
return fmt.Errorf("attempted to register duplicate analyzer named '%s'", name)
}
analyzers[name] = constructor
return nil
}
type AnalyzerConstructor func(config map[string]interface{}, cache *Cache) (analysis.Analyzer, error)
type AnalyzerRegistry map[string]AnalyzerConstructor
type AnalyzerCache struct {
*ConcurrentCache
}
func NewAnalyzerCache() *AnalyzerCache {
return &AnalyzerCache{
NewConcurrentCache(),
}
}
func AnalyzerBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := analyzers[name]
if !registered {
return nil, fmt.Errorf("no analyzer with name or type '%s' registered", name)
}
analyzer, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building analyzer: %v", err)
}
return analyzer, nil
}
func (c *AnalyzerCache) AnalyzerNamed(name string, cache *Cache) (analysis.Analyzer, error) {
item, err := c.ItemNamed(name, cache, AnalyzerBuild)
if err != nil {
return nil, err
}
return item.(analysis.Analyzer), nil
}
func (c *AnalyzerCache) DefineAnalyzer(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.Analyzer, error) {
item, err := c.DefineItem(name, typ, config, cache, AnalyzerBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("analyzer named '%s' already defined", name)
}
return nil, err
}
return item.(analysis.Analyzer), nil
}
func AnalyzerTypesAndInstances() ([]string, []string) {
emptyConfig := map[string]interface{}{}
emptyCache := NewCache()
var types []string
var instances []string
for name, cons := range analyzers {
_, err := cons(emptyConfig, emptyCache)
if err == nil {
instances = append(instances, name)
} else {
types = append(types, name)
}
}
return types, instances
}
================================================
FILE: registry/cache.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package registry
import (
"fmt"
"sync"
)
var ErrAlreadyDefined = fmt.Errorf("item already defined")
type CacheBuild func(name string, config map[string]interface{}, cache *Cache) (interface{}, error)
type ConcurrentCache struct {
mutex sync.RWMutex
data map[string]interface{}
}
func NewConcurrentCache() *ConcurrentCache {
return &ConcurrentCache{
data: make(map[string]interface{}),
}
}
func (c *ConcurrentCache) ItemNamed(name string, cache *Cache, build CacheBuild) (interface{}, error) {
c.mutex.RLock()
item, cached := c.data[name]
if cached {
c.mutex.RUnlock()
return item, nil
}
// give up read lock
c.mutex.RUnlock()
// try to build it
newItem, err := build(name, nil, cache)
if err != nil {
return nil, err
}
// acquire write lock
c.mutex.Lock()
defer c.mutex.Unlock()
// check again because it could have been created while trading locks
item, cached = c.data[name]
if cached {
return item, nil
}
c.data[name] = newItem
return newItem, nil
}
func (c *ConcurrentCache) DefineItem(name string, typ string, config map[string]interface{}, cache *Cache, build CacheBuild) (interface{}, error) {
c.mutex.RLock()
_, cached := c.data[name]
if cached {
c.mutex.RUnlock()
return nil, ErrAlreadyDefined
}
// give up read lock so others lookups can proceed
c.mutex.RUnlock()
// really not there, try to build it
newItem, err := build(typ, config, cache)
if err != nil {
return nil, err
}
// now we've built it, acquire lock
c.mutex.Lock()
defer c.mutex.Unlock()
// check again because it could have been created while trading locks
_, cached = c.data[name]
if cached {
return nil, ErrAlreadyDefined
}
c.data[name] = newItem
return newItem, nil
}
================================================
FILE: registry/char_filter.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package registry
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
)
func RegisterCharFilter(name string, constructor CharFilterConstructor) error {
_, exists := charFilters[name]
if exists {
return fmt.Errorf("attempted to register duplicate char filter named '%s'", name)
}
charFilters[name] = constructor
return nil
}
type CharFilterConstructor func(config map[string]interface{}, cache *Cache) (analysis.CharFilter, error)
type CharFilterRegistry map[string]CharFilterConstructor
type CharFilterCache struct {
*ConcurrentCache
}
func NewCharFilterCache() *CharFilterCache {
return &CharFilterCache{
NewConcurrentCache(),
}
}
func CharFilterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := charFilters[name]
if !registered {
return nil, fmt.Errorf("no char filter with name or type '%s' registered", name)
}
charFilter, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building char filter: %v", err)
}
return charFilter, nil
}
func (c *CharFilterCache) CharFilterNamed(name string, cache *Cache) (analysis.CharFilter, error) {
item, err := c.ItemNamed(name, cache, CharFilterBuild)
if err != nil {
return nil, err
}
return item.(analysis.CharFilter), nil
}
func (c *CharFilterCache) DefineCharFilter(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.CharFilter, error) {
item, err := c.DefineItem(name, typ, config, cache, CharFilterBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("char filter named '%s' already defined", name)
}
return nil, err
}
return item.(analysis.CharFilter), nil
}
func CharFilterTypesAndInstances() ([]string, []string) {
emptyConfig := map[string]interface{}{}
emptyCache := NewCache()
var types []string
var instances []string
for name, cons := range charFilters {
_, err := cons(emptyConfig, emptyCache)
if err == nil {
instances = append(instances, name)
} else {
types = append(types, name)
}
}
return types, instances
}
================================================
FILE: registry/datetime_parser.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package registry
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
)
func RegisterDateTimeParser(name string, constructor DateTimeParserConstructor) error {
_, exists := dateTimeParsers[name]
if exists {
return fmt.Errorf("attempted to register duplicate date time parser named '%s'", name)
}
dateTimeParsers[name] = constructor
return nil
}
type DateTimeParserConstructor func(config map[string]interface{}, cache *Cache) (analysis.DateTimeParser, error)
type DateTimeParserRegistry map[string]DateTimeParserConstructor
type DateTimeParserCache struct {
*ConcurrentCache
}
func NewDateTimeParserCache() *DateTimeParserCache {
return &DateTimeParserCache{
NewConcurrentCache(),
}
}
func DateTimeParserBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := dateTimeParsers[name]
if !registered {
return nil, fmt.Errorf("no date time parser with name or type '%s' registered", name)
}
dateTimeParser, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building date time parser: %v", err)
}
return dateTimeParser, nil
}
func (c *DateTimeParserCache) DateTimeParserNamed(name string, cache *Cache) (analysis.DateTimeParser, error) {
item, err := c.ItemNamed(name, cache, DateTimeParserBuild)
if err != nil {
return nil, err
}
return item.(analysis.DateTimeParser), nil
}
func (c *DateTimeParserCache) DefineDateTimeParser(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.DateTimeParser, error) {
item, err := c.DefineItem(name, typ, config, cache, DateTimeParserBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("date time parser named '%s' already defined", name)
}
return nil, err
}
return item.(analysis.DateTimeParser), nil
}
func DateTimeParserTypesAndInstances() ([]string, []string) {
emptyConfig := map[string]interface{}{}
emptyCache := NewCache()
var types []string
var instances []string
for name, cons := range dateTimeParsers {
_, err := cons(emptyConfig, emptyCache)
if err == nil {
instances = append(instances, name)
} else {
types = append(types, name)
}
}
return types, instances
}
================================================
FILE: registry/fragment_formatter.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package registry
import (
"fmt"
"github.com/blevesearch/bleve/v2/search/highlight"
)
func RegisterFragmentFormatter(name string, constructor FragmentFormatterConstructor) error {
_, exists := fragmentFormatters[name]
if exists {
return fmt.Errorf("attempted to register duplicate fragment formatter named '%s'", name)
}
fragmentFormatters[name] = constructor
return nil
}
type FragmentFormatterConstructor func(config map[string]interface{}, cache *Cache) (highlight.FragmentFormatter, error)
type FragmentFormatterRegistry map[string]FragmentFormatterConstructor
type FragmentFormatterCache struct {
*ConcurrentCache
}
func NewFragmentFormatterCache() *FragmentFormatterCache {
return &FragmentFormatterCache{
NewConcurrentCache(),
}
}
func FragmentFormatterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := fragmentFormatters[name]
if !registered {
return nil, fmt.Errorf("no fragment formatter with name or type '%s' registered", name)
}
fragmentFormatter, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building fragment formatter: %v", err)
}
return fragmentFormatter, nil
}
func (c *FragmentFormatterCache) FragmentFormatterNamed(name string, cache *Cache) (highlight.FragmentFormatter, error) {
item, err := c.ItemNamed(name, cache, FragmentFormatterBuild)
if err != nil {
return nil, err
}
return item.(highlight.FragmentFormatter), nil
}
func (c *FragmentFormatterCache) DefineFragmentFormatter(name string, typ string, config map[string]interface{}, cache *Cache) (highlight.FragmentFormatter, error) {
item, err := c.DefineItem(name, typ, config, cache, FragmentFormatterBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("fragment formatter named '%s' already defined", name)
}
return nil, err
}
return item.(highlight.FragmentFormatter), nil
}
func FragmentFormatterTypesAndInstances() ([]string, []string) {
emptyConfig := map[string]interface{}{}
emptyCache := NewCache()
var types []string
var instances []string
for name, cons := range fragmentFormatters {
_, err := cons(emptyConfig, emptyCache)
if err == nil {
instances = append(instances, name)
} else {
types = append(types, name)
}
}
return types, instances
}
================================================
FILE: registry/fragmenter.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package registry
import (
"fmt"
"github.com/blevesearch/bleve/v2/search/highlight"
)
func RegisterFragmenter(name string, constructor FragmenterConstructor) error {
_, exists := fragmenters[name]
if exists {
return fmt.Errorf("attempted to register duplicate fragmenter named '%s'", name)
}
fragmenters[name] = constructor
return nil
}
type FragmenterConstructor func(config map[string]interface{}, cache *Cache) (highlight.Fragmenter, error)
type FragmenterRegistry map[string]FragmenterConstructor
type FragmenterCache struct {
*ConcurrentCache
}
func NewFragmenterCache() *FragmenterCache {
return &FragmenterCache{
NewConcurrentCache(),
}
}
func FragmenterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := fragmenters[name]
if !registered {
return nil, fmt.Errorf("no fragmenter with name or type '%s' registered", name)
}
fragmenter, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building fragmenter: %v", err)
}
return fragmenter, nil
}
func (c *FragmenterCache) FragmenterNamed(name string, cache *Cache) (highlight.Fragmenter, error) {
item, err := c.ItemNamed(name, cache, FragmenterBuild)
if err != nil {
return nil, err
}
return item.(highlight.Fragmenter), nil
}
func (c *FragmenterCache) DefineFragmenter(name string, typ string, config map[string]interface{}, cache *Cache) (highlight.Fragmenter, error) {
item, err := c.DefineItem(name, typ, config, cache, FragmenterBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("fragmenter named '%s' already defined", name)
}
return nil, err
}
return item.(highlight.Fragmenter), nil
}
func FragmenterTypesAndInstances() ([]string, []string) {
emptyConfig := map[string]interface{}{}
emptyCache := NewCache()
var types []string
var instances []string
for name, cons := range fragmenters {
_, err := cons(emptyConfig, emptyCache)
if err == nil {
instances = append(instances, name)
} else {
types = append(types, name)
}
}
return types, instances
}
================================================
FILE: registry/highlighter.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package registry
import (
"fmt"
"github.com/blevesearch/bleve/v2/search/highlight"
)
func RegisterHighlighter(name string, constructor HighlighterConstructor) error {
_, exists := highlighters[name]
if exists {
return fmt.Errorf("attempted to register duplicate highlighter named '%s'", name)
}
highlighters[name] = constructor
return nil
}
type HighlighterConstructor func(config map[string]interface{}, cache *Cache) (highlight.Highlighter, error)
type HighlighterRegistry map[string]HighlighterConstructor
type HighlighterCache struct {
*ConcurrentCache
}
func NewHighlighterCache() *HighlighterCache {
return &HighlighterCache{
NewConcurrentCache(),
}
}
func HighlighterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := highlighters[name]
if !registered {
return nil, fmt.Errorf("no highlighter with name or type '%s' registered", name)
}
highlighter, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building highlighter: %v", err)
}
return highlighter, nil
}
func (c *HighlighterCache) HighlighterNamed(name string, cache *Cache) (highlight.Highlighter, error) {
item, err := c.ItemNamed(name, cache, HighlighterBuild)
if err != nil {
return nil, err
}
return item.(highlight.Highlighter), nil
}
func (c *HighlighterCache) DefineHighlighter(name string, typ string, config map[string]interface{}, cache *Cache) (highlight.Highlighter, error) {
item, err := c.DefineItem(name, typ, config, cache, HighlighterBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("highlighter named '%s' already defined", name)
}
return nil, err
}
return item.(highlight.Highlighter), nil
}
func HighlighterTypesAndInstances() ([]string, []string) {
emptyConfig := map[string]interface{}{}
emptyCache := NewCache()
var types []string
var instances []string
for name, cons := range highlighters {
_, err := cons(emptyConfig, emptyCache)
if err == nil {
instances = append(instances, name)
} else {
types = append(types, name)
}
}
return types, instances
}
================================================
FILE: registry/index_type.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package registry
import (
"fmt"
index "github.com/blevesearch/bleve_index_api"
)
func RegisterIndexType(name string, constructor IndexTypeConstructor) error {
_, exists := indexTypes[name]
if exists {
return fmt.Errorf("attempted to register duplicate index encoding named '%s'", name)
}
indexTypes[name] = constructor
return nil
}
type IndexTypeConstructor func(storeName string, storeConfig map[string]interface{}, analysisQueue *index.AnalysisQueue) (index.Index, error)
type IndexTypeRegistry map[string]IndexTypeConstructor
func IndexTypeConstructorByName(name string) IndexTypeConstructor {
return indexTypes[name]
}
func IndexTypesAndInstances() ([]string, []string) {
var types []string
var instances []string
for name := range indexTypes {
types = append(types, name)
}
return types, instances
}
================================================
FILE: registry/nested.go
================================================
// Copyright (c) 2026 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package registry
import (
"strings"
"sync"
"github.com/blevesearch/bleve/v2/search"
)
// NestedFieldCache caches nested field prefixes and their corresponding nesting levels.
// A nested field prefix is a field path prefix that indicates the start of a nested document.
// The nesting level indicates how deep the nested document is in the overall document structure.
type NestedFieldCache struct {
// nested prefix -> nested level
prefixDepth map[string]int
once sync.Once
m sync.RWMutex
}
func NewNestedFieldCache() *NestedFieldCache {
return &NestedFieldCache{}
}
func (nfc *NestedFieldCache) InitOnce(buildFunc func() map[string]int) {
nfc.once.Do(func() {
nfc.m.Lock()
defer nfc.m.Unlock()
nfc.prefixDepth = buildFunc()
})
}
// NestedDepth returns two values:
// - common: The nesting level of the longest prefix that applies to every field path
// in the provided FieldSet. A value of 0 means no nested prefix is shared
// across all field paths.
// - max: The nesting level of the longest prefix that applies to at least one
// field path in the provided FieldSet. A value of 0 means none of the
// field paths match any nested prefix.
func (nfc *NestedFieldCache) NestedDepth(fieldPaths search.FieldSet) (common int, max int) {
// if no field paths, no nested depth
if len(fieldPaths) == 0 {
return
}
nfc.m.RLock()
defer nfc.m.RUnlock()
// if no cached prefixes, no nested depth
if len(nfc.prefixDepth) == 0 {
return
}
// for each prefix, check if its a common prefix or matches any path
// update common and max accordingly with the highest nesting level
// possible for each respective case
for prefix, level := range nfc.prefixDepth {
// only check prefixes that could increase one of the results
if level <= common && level <= max {
continue
}
// check prefix against field paths, getting whether it matches all paths (common)
// and whether it matches at least one path (any)
matchAll, matchAny := nfc.prefixMatch(prefix, fieldPaths)
// if it matches all paths, update common
if matchAll && level > common {
common = level
}
// if it matches any path, update max
if matchAny && level > max {
max = level
}
}
return common, max
}
// CountNested returns the number of nested prefixes
func (nfc *NestedFieldCache) CountNested() int {
nfc.m.RLock()
defer nfc.m.RUnlock()
return len(nfc.prefixDepth)
}
// IntersectsPrefix returns true if any of the given
// field paths have a nested prefix
func (nfc *NestedFieldCache) IntersectsPrefix(fieldPaths search.FieldSet) bool {
// if no field paths, no intersection
if len(fieldPaths) == 0 {
return false
}
nfc.m.RLock()
defer nfc.m.RUnlock()
// if no cached prefixes, no intersection
if len(nfc.prefixDepth) == 0 {
return false
}
// Check each cached nested prefix to see if it intersects with any path
for prefix := range nfc.prefixDepth {
_, matchAny := nfc.prefixMatch(prefix, fieldPaths)
if matchAny {
return true
}
}
return false
}
// prefixMatch checks whether the prefix matches all paths (common) and whether it matches at least one path (any)
// Caller must hold the read lock.
func (nfc *NestedFieldCache) prefixMatch(prefix string, fieldPaths search.FieldSet) (common bool, any bool) {
common = true
any = false
for path := range fieldPaths {
has := strings.HasPrefix(path, prefix)
if has {
any = true
} else {
common = false
}
// early exit if we have determined both values
if any && !common {
break
}
}
return common, any
}
================================================
FILE: registry/registry.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package registry
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/search/highlight"
)
var stores = make(KVStoreRegistry, 0)
var indexTypes = make(IndexTypeRegistry, 0)
// highlight
var fragmentFormatters = make(FragmentFormatterRegistry, 0)
var fragmenters = make(FragmenterRegistry, 0)
var highlighters = make(HighlighterRegistry, 0)
// analysis
var charFilters = make(CharFilterRegistry, 0)
var tokenizers = make(TokenizerRegistry, 0)
var tokenMaps = make(TokenMapRegistry, 0)
var tokenFilters = make(TokenFilterRegistry, 0)
var analyzers = make(AnalyzerRegistry, 0)
var dateTimeParsers = make(DateTimeParserRegistry, 0)
var synonymSources = make(SynonymSourceRegistry, 0)
type Cache struct {
CharFilters *CharFilterCache
Tokenizers *TokenizerCache
TokenMaps *TokenMapCache
TokenFilters *TokenFilterCache
Analyzers *AnalyzerCache
DateTimeParsers *DateTimeParserCache
FragmentFormatters *FragmentFormatterCache
Fragmenters *FragmenterCache
Highlighters *HighlighterCache
SynonymSources *SynonymSourceCache
NestedPrefixes *NestedFieldCache
}
func NewCache() *Cache {
return &Cache{
CharFilters: NewCharFilterCache(),
Tokenizers: NewTokenizerCache(),
TokenMaps: NewTokenMapCache(),
TokenFilters: NewTokenFilterCache(),
Analyzers: NewAnalyzerCache(),
DateTimeParsers: NewDateTimeParserCache(),
FragmentFormatters: NewFragmentFormatterCache(),
Fragmenters: NewFragmenterCache(),
Highlighters: NewHighlighterCache(),
SynonymSources: NewSynonymSourceCache(),
NestedPrefixes: NewNestedFieldCache(),
}
}
func typeFromConfig(config map[string]interface{}) (string, error) {
prop, ok := config["type"]
if !ok {
return "", fmt.Errorf("'type' property is not defined")
}
typ, ok := prop.(string)
if !ok {
return "", fmt.Errorf("'type' property must be a string, not %T", prop)
}
return typ, nil
}
func (c *Cache) CharFilterNamed(name string) (analysis.CharFilter, error) {
return c.CharFilters.CharFilterNamed(name, c)
}
func (c *Cache) DefineCharFilter(name string, config map[string]interface{}) (analysis.CharFilter, error) {
typ, err := typeFromConfig(config)
if err != nil {
return nil, err
}
return c.CharFilters.DefineCharFilter(name, typ, config, c)
}
func (c *Cache) TokenizerNamed(name string) (analysis.Tokenizer, error) {
return c.Tokenizers.TokenizerNamed(name, c)
}
func (c *Cache) DefineTokenizer(name string, config map[string]interface{}) (analysis.Tokenizer, error) {
typ, err := typeFromConfig(config)
if err != nil {
return nil, fmt.Errorf("cannot resolve '%s' tokenizer type: %s", name, err)
}
return c.Tokenizers.DefineTokenizer(name, typ, config, c)
}
func (c *Cache) TokenMapNamed(name string) (analysis.TokenMap, error) {
return c.TokenMaps.TokenMapNamed(name, c)
}
func (c *Cache) DefineTokenMap(name string, config map[string]interface{}) (analysis.TokenMap, error) {
typ, err := typeFromConfig(config)
if err != nil {
return nil, err
}
return c.TokenMaps.DefineTokenMap(name, typ, config, c)
}
func (c *Cache) TokenFilterNamed(name string) (analysis.TokenFilter, error) {
return c.TokenFilters.TokenFilterNamed(name, c)
}
func (c *Cache) DefineTokenFilter(name string, config map[string]interface{}) (analysis.TokenFilter, error) {
typ, err := typeFromConfig(config)
if err != nil {
return nil, err
}
return c.TokenFilters.DefineTokenFilter(name, typ, config, c)
}
func (c *Cache) AnalyzerNamed(name string) (analysis.Analyzer, error) {
return c.Analyzers.AnalyzerNamed(name, c)
}
func (c *Cache) DefineAnalyzer(name string, config map[string]interface{}) (analysis.Analyzer, error) {
typ, err := typeFromConfig(config)
if err != nil {
return nil, err
}
return c.Analyzers.DefineAnalyzer(name, typ, config, c)
}
func (c *Cache) DateTimeParserNamed(name string) (analysis.DateTimeParser, error) {
return c.DateTimeParsers.DateTimeParserNamed(name, c)
}
func (c *Cache) DefineDateTimeParser(name string, config map[string]interface{}) (analysis.DateTimeParser, error) {
typ, err := typeFromConfig(config)
if err != nil {
return nil, err
}
return c.DateTimeParsers.DefineDateTimeParser(name, typ, config, c)
}
func (c *Cache) SynonymSourceNamed(name string) (analysis.SynonymSource, error) {
return c.SynonymSources.SynonymSourceNamed(name, c)
}
func (c *Cache) DefineSynonymSource(name string, config map[string]interface{}) (analysis.SynonymSource, error) {
return c.SynonymSources.DefineSynonymSource(name, analysis.SynonymSourceType, config, c)
}
func (c *Cache) FragmentFormatterNamed(name string) (highlight.FragmentFormatter, error) {
return c.FragmentFormatters.FragmentFormatterNamed(name, c)
}
func (c *Cache) DefineFragmentFormatter(name string, config map[string]interface{}) (highlight.FragmentFormatter, error) {
typ, err := typeFromConfig(config)
if err != nil {
return nil, err
}
return c.FragmentFormatters.DefineFragmentFormatter(name, typ, config, c)
}
func (c *Cache) FragmenterNamed(name string) (highlight.Fragmenter, error) {
return c.Fragmenters.FragmenterNamed(name, c)
}
func (c *Cache) DefineFragmenter(name string, config map[string]interface{}) (highlight.Fragmenter, error) {
typ, err := typeFromConfig(config)
if err != nil {
return nil, err
}
return c.Fragmenters.DefineFragmenter(name, typ, config, c)
}
func (c *Cache) HighlighterNamed(name string) (highlight.Highlighter, error) {
return c.Highlighters.HighlighterNamed(name, c)
}
func (c *Cache) DefineHighlighter(name string, config map[string]interface{}) (highlight.Highlighter, error) {
typ, err := typeFromConfig(config)
if err != nil {
return nil, err
}
return c.Highlighters.DefineHighlighter(name, typ, config, c)
}
================================================
FILE: registry/store.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package registry
import (
"fmt"
store "github.com/blevesearch/upsidedown_store_api"
)
func RegisterKVStore(name string, constructor KVStoreConstructor) error {
_, exists := stores[name]
if exists {
return fmt.Errorf("attempted to register duplicate store named '%s'", name)
}
stores[name] = constructor
return nil
}
// KVStoreConstructor is used to build a KVStore of a specific type when
// specified by the index configuration. In addition to meeting the
// store.KVStore interface, KVStores must also support this constructor.
// Note that currently the values of config must
// be able to be marshaled and unmarshaled using the encoding/json library (used
// when reading/writing the index metadata file).
type KVStoreConstructor func(mo store.MergeOperator, config map[string]interface{}) (store.KVStore, error)
type KVStoreRegistry map[string]KVStoreConstructor
func KVStoreConstructorByName(name string) KVStoreConstructor {
return stores[name]
}
func KVStoreTypesAndInstances() ([]string, []string) {
var types []string
var instances []string
for name := range stores {
types = append(types, name)
}
return types, instances
}
================================================
FILE: registry/synonym_source.go
================================================
// Copyright (c) 2024 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package registry
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
)
func RegisterSynonymSource(typ string, constructor SynonymSourceConstructor) error {
_, exists := synonymSources[typ]
if exists {
return fmt.Errorf("attempted to register duplicate synonym source with type '%s'", typ)
}
synonymSources[typ] = constructor
return nil
}
type SynonymSourceCache struct {
*ConcurrentCache
}
func NewSynonymSourceCache() *SynonymSourceCache {
return &SynonymSourceCache{
NewConcurrentCache(),
}
}
type SynonymSourceConstructor func(config map[string]interface{}, cache *Cache) (analysis.SynonymSource, error)
type SynonymSourceRegistry map[string]SynonymSourceConstructor
func SynonymSourceBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := synonymSources[name]
if !registered {
return nil, fmt.Errorf("no synonym source with name '%s' registered", name)
}
synonymSource, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building synonym source: %v", err)
}
return synonymSource, nil
}
func (c *SynonymSourceCache) SynonymSourceNamed(name string, cache *Cache) (analysis.SynonymSource, error) {
item, err := c.ItemNamed(name, cache, SynonymSourceBuild)
if err != nil {
return nil, err
}
return item.(analysis.SynonymSource), nil
}
func (c *SynonymSourceCache) DefineSynonymSource(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.SynonymSource, error) {
item, err := c.DefineItem(name, typ, config, cache, SynonymSourceBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("synonym source named '%s' already defined", name)
}
return nil, err
}
return item.(analysis.SynonymSource), nil
}
func (c *SynonymSourceCache) VisitSynonymSources(visitor analysis.SynonymSourceVisitor) error {
c.mutex.RLock()
defer c.mutex.RUnlock()
for k, v := range c.data {
err := visitor(k, v.(analysis.SynonymSource))
if err != nil {
return err
}
}
return nil
}
================================================
FILE: registry/token_filter.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package registry
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
)
func RegisterTokenFilter(name string, constructor TokenFilterConstructor) error {
_, exists := tokenFilters[name]
if exists {
return fmt.Errorf("attempted to register duplicate token filter named '%s'", name)
}
tokenFilters[name] = constructor
return nil
}
type TokenFilterConstructor func(config map[string]interface{}, cache *Cache) (analysis.TokenFilter, error)
type TokenFilterRegistry map[string]TokenFilterConstructor
type TokenFilterCache struct {
*ConcurrentCache
}
func NewTokenFilterCache() *TokenFilterCache {
return &TokenFilterCache{
NewConcurrentCache(),
}
}
func TokenFilterBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := tokenFilters[name]
if !registered {
return nil, fmt.Errorf("no token filter with name or type '%s' registered", name)
}
tokenFilter, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building token filter: %v", err)
}
return tokenFilter, nil
}
func (c *TokenFilterCache) TokenFilterNamed(name string, cache *Cache) (analysis.TokenFilter, error) {
item, err := c.ItemNamed(name, cache, TokenFilterBuild)
if err != nil {
return nil, err
}
return item.(analysis.TokenFilter), nil
}
func (c *TokenFilterCache) DefineTokenFilter(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.TokenFilter, error) {
item, err := c.DefineItem(name, typ, config, cache, TokenFilterBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("token filter named '%s' already defined", name)
}
return nil, err
}
return item.(analysis.TokenFilter), nil
}
func TokenFilterTypesAndInstances() ([]string, []string) {
emptyConfig := map[string]interface{}{}
emptyCache := NewCache()
var types []string
var instances []string
for name, cons := range tokenFilters {
_, err := cons(emptyConfig, emptyCache)
if err == nil {
instances = append(instances, name)
} else {
types = append(types, name)
}
}
return types, instances
}
================================================
FILE: registry/token_maps.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package registry
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
)
func RegisterTokenMap(name string, constructor TokenMapConstructor) error {
_, exists := tokenMaps[name]
if exists {
return fmt.Errorf("attempted to register duplicate token map named '%s'", name)
}
tokenMaps[name] = constructor
return nil
}
type TokenMapConstructor func(config map[string]interface{}, cache *Cache) (analysis.TokenMap, error)
type TokenMapRegistry map[string]TokenMapConstructor
type TokenMapCache struct {
*ConcurrentCache
}
func NewTokenMapCache() *TokenMapCache {
return &TokenMapCache{
NewConcurrentCache(),
}
}
func TokenMapBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := tokenMaps[name]
if !registered {
return nil, fmt.Errorf("no token map with name or type '%s' registered", name)
}
tokenMap, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building token map: %v", err)
}
return tokenMap, nil
}
func (c *TokenMapCache) TokenMapNamed(name string, cache *Cache) (analysis.TokenMap, error) {
item, err := c.ItemNamed(name, cache, TokenMapBuild)
if err != nil {
return nil, err
}
return item.(analysis.TokenMap), nil
}
func (c *TokenMapCache) DefineTokenMap(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.TokenMap, error) {
item, err := c.DefineItem(name, typ, config, cache, TokenMapBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("token map named '%s' already defined", name)
}
return nil, err
}
return item.(analysis.TokenMap), nil
}
func TokenMapTypesAndInstances() ([]string, []string) {
emptyConfig := map[string]interface{}{}
emptyCache := NewCache()
var types []string
var instances []string
for name, cons := range tokenMaps {
_, err := cons(emptyConfig, emptyCache)
if err == nil {
instances = append(instances, name)
} else {
types = append(types, name)
}
}
return types, instances
}
================================================
FILE: registry/tokenizer.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package registry
import (
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
)
func RegisterTokenizer(name string, constructor TokenizerConstructor) error {
_, exists := tokenizers[name]
if exists {
return fmt.Errorf("attempted to register duplicate tokenizer named '%s'", name)
}
tokenizers[name] = constructor
return nil
}
type TokenizerConstructor func(config map[string]interface{}, cache *Cache) (analysis.Tokenizer, error)
type TokenizerRegistry map[string]TokenizerConstructor
type TokenizerCache struct {
*ConcurrentCache
}
func NewTokenizerCache() *TokenizerCache {
return &TokenizerCache{
NewConcurrentCache(),
}
}
func TokenizerBuild(name string, config map[string]interface{}, cache *Cache) (interface{}, error) {
cons, registered := tokenizers[name]
if !registered {
return nil, fmt.Errorf("no tokenizer with name or type '%s' registered", name)
}
tokenizer, err := cons(config, cache)
if err != nil {
return nil, fmt.Errorf("error building tokenizer: %v", err)
}
return tokenizer, nil
}
func (c *TokenizerCache) TokenizerNamed(name string, cache *Cache) (analysis.Tokenizer, error) {
item, err := c.ItemNamed(name, cache, TokenizerBuild)
if err != nil {
return nil, err
}
return item.(analysis.Tokenizer), nil
}
func (c *TokenizerCache) DefineTokenizer(name string, typ string, config map[string]interface{}, cache *Cache) (analysis.Tokenizer, error) {
item, err := c.DefineItem(name, typ, config, cache, TokenizerBuild)
if err != nil {
if err == ErrAlreadyDefined {
return nil, fmt.Errorf("tokenizer named '%s' already defined", name)
}
return nil, err
}
return item.(analysis.Tokenizer), nil
}
func TokenizerTypesAndInstances() ([]string, []string) {
emptyConfig := map[string]interface{}{}
emptyCache := NewCache()
var types []string
var instances []string
for name, cons := range tokenizers {
_, err := cons(emptyConfig, emptyCache)
if err == nil {
instances = append(instances, name)
} else {
types = append(types, name)
}
}
return types, instances
}
================================================
FILE: rescorer.go
================================================
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"github.com/blevesearch/bleve/v2/fusion"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/query"
)
const (
DefaultScoreRankConstant = 60
)
// Rescorer is applied after all the query and knn results are obtained.
// The main use of Rescorer is in hybrid search; all the individual scores
// for query and knn are combined using Rescorer. Makes use of algorithms
// defined in `fusion`
type rescorer struct {
req *SearchRequest
// Stores the original From, Size and Boost parameters from the request
origFrom int
origSize int
origBoosts []float64
// Flag variable to make sure that restoreSearchRequest is only run once
// when it is deferred
restored bool
}
// Stores information about the hybrid search into FusionRescorer.
// Also mutates the SearchRequest by:
// - Setting boosts to 1: top level boosts only used for rescoring
// - Setting From and Size to 0 and ScoreWindowSize
func (r *rescorer) prepareSearchRequest() error {
if r.req.Params == nil {
r.req.Params = NewDefaultParams(r.req.From, r.req.Size)
}
r.origFrom = r.req.From
r.origSize = r.req.Size
r.req.From = 0
r.req.Size = r.req.Params.ScoreWindowSize
// req.Query's top level boost comes first, followed by the KNN queries
numQueries := numKNNQueries(r.req) + 1
r.origBoosts = make([]float64, numQueries)
// only modify queries if it is boostable. If not, ignore
if bQuery, ok := r.req.Query.(query.BoostableQuery); ok {
r.origBoosts[0] = bQuery.Boost()
bQuery.SetBoost(1.0)
} else {
r.origBoosts[0] = 1.0
}
// for all the knn queries, replace boost values
r.prepareKnnRequest()
return nil
}
func (r *rescorer) restoreSearchRequest() {
// Skip if already restored
if r.restored {
return
}
r.restored = true
r.req.From = r.origFrom
r.req.Size = r.origSize
if bQuery, ok := r.req.Query.(query.BoostableQuery); ok {
bQuery.SetBoost(r.origBoosts[0])
}
// for all the knn queries, restore boost values
r.restoreKnnRequest()
}
func (r *rescorer) rescore(ftsHits, knnHits search.DocumentMatchCollection) (search.DocumentMatchCollection, uint64, float64) {
mergedHits := r.mergeDocs(ftsHits, knnHits)
var fusionResult *fusion.FusionResult
switch r.req.Score {
case ScoreRRF:
fusionResult = fusion.ReciprocalRankFusion(
mergedHits,
r.origBoosts,
r.req.Params.ScoreRankConstant,
r.req.Params.ScoreWindowSize,
numKNNQueries(r.req),
r.req.Explain,
)
case ScoreRSF:
fusionResult = fusion.RelativeScoreFusion(
mergedHits,
r.origBoosts,
r.req.Params.ScoreWindowSize,
numKNNQueries(r.req),
r.req.Explain,
)
}
return fusionResult.Hits, fusionResult.Total, fusionResult.MaxScore
}
// Merge all the FTS and KNN docs along with explanations
func (r *rescorer) mergeDocs(ftsHits, knnHits search.DocumentMatchCollection) search.DocumentMatchCollection {
if len(knnHits) == 0 {
return ftsHits
}
knnHitMap := make(map[string]*search.DocumentMatch, len(knnHits))
for _, hit := range knnHits {
knnHitMap[hit.ID] = hit
}
for _, hit := range ftsHits {
if knnHit, ok := knnHitMap[hit.ID]; ok {
hit.ScoreBreakdown = knnHit.ScoreBreakdown
if r.req.Explain {
hit.Expl = &search.Explanation{Value: 0.0, Message: "", Children: append([]*search.Explanation{hit.Expl}, knnHit.Expl.Children...)}
}
delete(knnHitMap, hit.ID)
}
}
for _, hit := range knnHitMap {
hit.Score = 0
ftsHits = append(ftsHits, hit)
if r.req.Explain {
hit.Expl = &search.Explanation{Value: 0.0, Message: "", Children: append([]*search.Explanation{nil}, hit.Expl.Children...)}
}
}
return ftsHits
}
func newRescorer(req *SearchRequest) *rescorer {
return &rescorer{
req: req,
}
}
================================================
FILE: rescorer_knn_test.go
================================================
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package bleve
import (
"context"
"math"
"testing"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/query"
index "github.com/blevesearch/bleve_index_api"
)
func createHybridSearchIndex(path string) (Index, error) {
// Index mapping
indexMapping := NewIndexMapping()
// Disable default mapping to match expected configuration
indexMapping.DefaultMapping.Enabled = false
indexMapping.DefaultMapping.Dynamic = false
// Create a specific document mapping type
docMapping := NewDocumentMapping()
docMapping.Enabled = true
docMapping.Dynamic = false
// Text field for color with specific properties
colorFieldMapping := NewTextFieldMapping()
colorFieldMapping.Analyzer = "en" // Use "en" analyzer as specified
colorFieldMapping.DocValues = true
colorFieldMapping.IncludeInAll = true
colorFieldMapping.Store = true
colorFieldMapping.Index = true
docMapping.AddFieldMappingsAt("color", colorFieldMapping)
// Vector field for color vector with L2 similarity
vecFieldMapping := mapping.NewVectorFieldMapping()
vecFieldMapping.Dims = 3
vecFieldMapping.Similarity = index.EuclideanDistance // l2_norm equivalent
vecFieldMapping.VectorIndexOptimizedFor = "recall"
docMapping.AddFieldMappingsAt("colorvect_l2", vecFieldMapping)
// Add the document mapping to the index
indexMapping.AddDocumentMapping("_default", docMapping)
// Create index
return New(path, indexMapping)
}
func getHybridSearchDocuments() []map[string]interface{} {
documents := []map[string]interface{}{
{
"color": "dark slate blue",
"colorvect_l2": []float32{72, 61, 139},
},
{
"color": "blue",
"colorvect_l2": []float32{0, 0, 255},
},
{
"color": "navy",
"colorvect_l2": []float32{0, 0, 128},
},
{
"color": "steel blue",
"colorvect_l2": []float32{70, 130, 180},
},
{
"color": "light blue",
"colorvect_l2": []float32{173, 216, 230},
},
{
"color": "deep sky blue",
"colorvect_l2": []float32{0, 191, 255},
},
{
"color": "royal blue",
"colorvect_l2": []float32{65, 105, 225},
},
{
"color": "powder blue",
"colorvect_l2": []float32{176, 224, 230},
},
{
"color": "corn flower blue",
"colorvect_l2": []float32{100, 149, 237},
},
{
"color": "alice blue",
"colorvect_l2": []float32{240, 248, 255},
},
{
"color": "blue violet",
"colorvect_l2": []float32{138, 43, 226},
},
{
"color": "sky blue",
"colorvect_l2": []float32{135, 206, 235},
},
{
"color": "indigo",
"colorvect_l2": []float32{75, 0, 130},
},
{
"color": "midnight blue",
"colorvect_l2": []float32{25, 25, 112},
},
{
"color": "dark blue",
"colorvect_l2": []float32{0, 0, 139},
},
{
"color": "medium slate blue",
"colorvect_l2": []float32{123, 104, 238},
},
{
"color": "cadet blue",
"colorvect_l2": []float32{95, 158, 160},
},
{
"color": "light steel blue",
"colorvect_l2": []float32{176, 196, 222},
},
{
"color": "dodger blue",
"colorvect_l2": []float32{30, 144, 255},
},
{
"color": "medium blue",
"colorvect_l2": []float32{0, 0, 205},
},
{
"color": "slate blue",
"colorvect_l2": []float32{106, 90, 205},
},
{
"color": "light sky blue",
"colorvect_l2": []float32{135, 206, 250},
},
}
return documents
}
func createScoreFusionRequest(scoreMethod string, knn bool) *SearchRequest {
// Create hybrid search request (FTS + KNN)
textQuery := query.NewMatchPhraseQuery("dark")
searchRequest := NewSearchRequest(textQuery)
if knn {
queryVector_1 := []float32{0, 0, 129} // Similar to blue colors
searchRequest.AddKNN("colorvect_l2", queryVector_1, 5, 1.0)
queryVector_2 := []float32{0, 0, 250} // lighter blue
searchRequest.AddKNN("colorvect_l2", queryVector_2, 5, 1.0)
}
params := RequestParams{1, 10}
searchRequest.AddParams(params)
searchRequest.Size = 10
searchRequest.Score = scoreMethod
searchRequest.Explain = false
return searchRequest
}
// verifyRRFResults verifies that the search hits match the expected RRF ranking and scores
func verifyRRFResults(t *testing.T, hits search.DocumentMatchCollection) {
// Manual RRF calculation for verification
// With k=1 (ScoreRankConstant), RRF formula: 1/(1+rank)
//
// FTS "dark" ranks:
// 1. dark blue, 2. dark slate blue
//
// kNN1 [0,0,129] ranks:
// 1. navy, 2. dark blue, 3. midnight blue, 4. indigo, 5. medium blue
//
// kNN2 [0,0,250] ranks:
// 1. blue, 2. medium blue, 3. dark blue, 4. navy, 5. royal blue
expectedRRFScores := map[string]float64{
"dark blue": 1.083333, // FTS(1): 1/2 + kNN1(2): 1/3 + kNN2(3): 1/4 = 1.083333
"navy": 0.7, // kNN1(1): 1/2 + kNN2(4): 1/5 = 0.7
"blue": 0.5, // kNN2(1): 1/2 = 0.5
"medium blue": 0.5, // kNN1(5): 1/6 + kNN2(2): 1/3 = 0.5
"dark slate blue": 0.333333, // FTS(2): 1/3 = 0.333333
"midnight blue": 0.25, // kNN1(3): 1/4 = 0.25
"indigo": 0.2, // kNN1(4): 1/5 = 0.2
"royal blue": 0.166667, // kNN2(5): 1/6 = 0.166667
}
// Verify top results match expected RRF ranking
expectedOrder := []string{"dark blue", "navy", "blue", "medium blue", "dark slate blue", "midnight blue", "indigo", "royal blue"}
if len(hits) < len(expectedOrder) {
t.Fatalf("Expected at least %d results, got %d", len(expectedOrder), len(hits))
}
for i, expectedID := range expectedOrder {
if hits[i].ID != expectedID {
id := hits[i].ID
if !(id == "blue" || id == "medium blue") { // Don't throw an error, since these scores are the same
t.Errorf("Position %d: expected %s, got %s", i+1, expectedID, hits[i].ID)
}
}
expectedScore := expectedRRFScores[expectedID]
actualScore := hits[i].Score
tolerance := 0.001
if math.Abs(actualScore-expectedScore) > tolerance {
t.Errorf("Score for %s: expected %.6f, got %.6f (diff: %.6f)",
expectedID, expectedScore, actualScore, math.Abs(actualScore-expectedScore))
}
}
}
// setupSingleIndex creates a single index with all documents
func setupSingleIndex(t *testing.T) (Index, func()) {
tmpIndexPath := createTmpIndexPath(t)
index, err := createHybridSearchIndex(tmpIndexPath)
if err != nil {
t.Fatal(err)
}
documents := getHybridSearchDocuments()
// Index documents
batch := index.NewBatch()
for _, doc := range documents {
colorName := doc["color"].(string)
err = batch.Index(colorName, doc)
if err != nil {
t.Fatal(err)
}
}
err = index.Batch(batch)
if err != nil {
t.Fatal(err)
}
cleanup := func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
cleanupTmpIndexPath(t, tmpIndexPath)
}
return index, cleanup
}
// setupAliasWithSingleIndex creates an alias containing one index with all documents
func setupAliasWithSingleIndex(t *testing.T) (Index, func()) {
tmpIndexPath := createTmpIndexPath(t)
index, err := createHybridSearchIndex(tmpIndexPath)
if err != nil {
t.Fatal(err)
}
documents := getHybridSearchDocuments()
// Create alias and add the single index
alias := NewIndexAlias()
alias.Add(index)
// Index all documents
batch := alias.NewBatch()
for _, doc := range documents {
colorName := doc["color"].(string)
err = batch.Index(colorName, doc)
if err != nil {
t.Fatal(err)
}
}
err = alias.Batch(batch)
if err != nil {
t.Fatal(err)
}
cleanup := func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
cleanupTmpIndexPath(t, tmpIndexPath)
}
return alias, cleanup
}
// setupAliasWithTwoIndexes creates an alias containing two indexes with documents split between them
func setupAliasWithTwoIndexes(t *testing.T) (Index, func()) {
documents := getHybridSearchDocuments()
// Split documents into two groups
midpoint := len(documents) / 2
docs1 := documents[:midpoint]
docs2 := documents[midpoint:]
// Create first index
tmpIndexPath1 := createTmpIndexPath(t)
index1, err := createHybridSearchIndex(tmpIndexPath1)
if err != nil {
t.Fatal(err)
}
// Index first half of documents
batch1 := index1.NewBatch()
for _, doc := range docs1 {
colorName := doc["color"].(string)
err = batch1.Index(colorName, doc)
if err != nil {
t.Fatal(err)
}
}
err = index1.Batch(batch1)
if err != nil {
t.Fatal(err)
}
// Create second index
tmpIndexPath2 := createTmpIndexPath(t)
index2, err := createHybridSearchIndex(tmpIndexPath2)
if err != nil {
t.Fatal(err)
}
// Index second half of documents
batch2 := index2.NewBatch()
for _, doc := range docs2 {
colorName := doc["color"].(string)
err = batch2.Index(colorName, doc)
if err != nil {
t.Fatal(err)
}
}
err = index2.Batch(batch2)
if err != nil {
t.Fatal(err)
}
// Create alias and add both indexes
alias := NewIndexAlias()
alias.Add(index1, index2)
cleanup := func() {
err := index1.Close()
if err != nil {
t.Fatal(err)
}
err = index2.Close()
if err != nil {
t.Fatal(err)
}
cleanupTmpIndexPath(t, tmpIndexPath1)
cleanupTmpIndexPath(t, tmpIndexPath2)
}
return alias, cleanup
}
// setupNestedAliases creates nested aliases with three indexes spread across sub-aliases
func setupNestedAliases(t *testing.T) (Index, func()) {
documents := getHybridSearchDocuments()
// Split documents into three groups
thirdPoint1 := len(documents) / 3
thirdPoint2 := 2 * len(documents) / 3
docs1 := documents[:thirdPoint1]
docs2 := documents[thirdPoint1:thirdPoint2]
docs3 := documents[thirdPoint2:]
// Create first index
tmpIndexPath1 := createTmpIndexPath(t)
index1, err := createHybridSearchIndex(tmpIndexPath1)
if err != nil {
t.Fatal(err)
}
// Index first third of documents
batch1 := index1.NewBatch()
for _, doc := range docs1 {
colorName := doc["color"].(string)
err = batch1.Index(colorName, doc)
if err != nil {
t.Fatal(err)
}
}
err = index1.Batch(batch1)
if err != nil {
t.Fatal(err)
}
// Create second index
tmpIndexPath2 := createTmpIndexPath(t)
index2, err := createHybridSearchIndex(tmpIndexPath2)
if err != nil {
t.Fatal(err)
}
// Index second third of documents
batch2 := index2.NewBatch()
for _, doc := range docs2 {
colorName := doc["color"].(string)
err = batch2.Index(colorName, doc)
if err != nil {
t.Fatal(err)
}
}
err = index2.Batch(batch2)
if err != nil {
t.Fatal(err)
}
// Create third index
tmpIndexPath3 := createTmpIndexPath(t)
index3, err := createHybridSearchIndex(tmpIndexPath3)
if err != nil {
t.Fatal(err)
}
// Index third third of documents
batch3 := index3.NewBatch()
for _, doc := range docs3 {
colorName := doc["color"].(string)
err = batch3.Index(colorName, doc)
if err != nil {
t.Fatal(err)
}
}
err = index3.Batch(batch3)
if err != nil {
t.Fatal(err)
}
// Create first sub-alias (contains 1 index)
subAlias1 := NewIndexAlias()
subAlias1.SetName("subAlias1")
subAlias1.Add(index1)
// Create second sub-alias (contains 2 indexes)
subAlias2 := NewIndexAlias()
subAlias2.SetName("subAlias2")
subAlias2.Add(index2, index3)
// Create master alias containing the two sub-aliases
masterAlias := NewIndexAlias()
masterAlias.SetName("masterAlias")
masterAlias.Add(subAlias1, subAlias2)
cleanup := func() {
err := index1.Close()
if err != nil {
t.Fatal(err)
}
err = index2.Close()
if err != nil {
t.Fatal(err)
}
err = index3.Close()
if err != nil {
t.Fatal(err)
}
cleanupTmpIndexPath(t, tmpIndexPath1)
cleanupTmpIndexPath(t, tmpIndexPath2)
cleanupTmpIndexPath(t, tmpIndexPath3)
}
return masterAlias, cleanup
}
func TestRRFEndToEnd(t *testing.T) {
// Setup the index configuration
index, cleanup := setupSingleIndex(t)
defer cleanup()
// Create the search request
searchRequest := createScoreFusionRequest(ScoreRRF, true)
// Execute search
result, err := index.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
// Verify RRF results
verifyRRFResults(t, result.Hits)
}
// TestRRFAliasWithSingleIndex tests RRF with an alias containing one index
func TestRRFAliasWithSingleIndex(t *testing.T) {
// Setup the alias configuration
alias, cleanup := setupAliasWithSingleIndex(t)
defer cleanup()
// Create the search request
searchRequest := createScoreFusionRequest(ScoreRRF, true)
// Execute search through alias
result, err := alias.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
// Verify RRF results - should be identical to direct index search
verifyRRFResults(t, result.Hits)
}
// TestRRFAliasWithTwoIndexes tests RRF with an alias containing two indexes
func TestRRFAliasWithTwoIndexes(t *testing.T) {
// Setup the alias configuration
alias, cleanup := setupAliasWithTwoIndexes(t)
defer cleanup()
// Create the search request
searchRequest := createScoreFusionRequest(ScoreRRF, true)
// Execute search through alias
result, err := alias.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
// Verify RRF results - should be identical to single index results
verifyRRFResults(t, result.Hits)
}
// TestRRFNestedAliases tests RRF with an alias containing two index aliases
func TestRRFNestedAliases(t *testing.T) {
// Setup the nested aliases configuration
masterAlias, cleanup := setupNestedAliases(t)
defer cleanup()
// Create the search request
searchRequest := createScoreFusionRequest(ScoreRRF, true)
// Execute search through master alias
result, err := masterAlias.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
// Verify RRF results - should be identical to single index results
verifyRRFResults(t, result.Hits)
}
// TestRRFPagination tests RRF with pagination across different index/alias configurations
func TestRRFPagination(t *testing.T) {
scenarios := []struct {
name string
setup func(t *testing.T) (Index, func())
}{
{
name: "SingleIndex",
setup: setupSingleIndex,
},
{
name: "AliasWithSingleIndex",
setup: setupAliasWithSingleIndex,
},
{
name: "AliasWithTwoIndexes",
setup: setupAliasWithTwoIndexes,
},
{
name: "NestedAliases",
setup: setupNestedAliases,
},
}
for _, scenario := range scenarios {
t.Run(scenario.name, func(t *testing.T) {
// Setup the index/alias configuration
index, cleanup := scenario.setup(t)
defer cleanup()
// Create first page request (first 5 results)
firstPageRequest := createScoreFusionRequest(ScoreRRF, true)
firstPageRequest.From = 0
firstPageRequest.Size = 5
// Execute first page search
firstPageResult, err := index.Search(firstPageRequest)
if err != nil {
t.Fatal(err)
}
// Create second page request (next 5 results, starting from index 5)
secondPageRequest := createScoreFusionRequest(ScoreRRF, true)
secondPageRequest.From = 5
secondPageRequest.Size = 5
// Execute second page search
secondPageResult, err := index.Search(secondPageRequest)
if err != nil {
t.Fatal(err)
}
// Combine results from both pages
combinedHits := make(search.DocumentMatchCollection, 0, len(firstPageResult.Hits)+len(secondPageResult.Hits))
combinedHits = append(combinedHits, firstPageResult.Hits...)
combinedHits = append(combinedHits, secondPageResult.Hits...)
// Verify we have the expected number of results
if len(firstPageResult.Hits) != 5 {
t.Errorf("Expected 5 results in first page, got %d", len(firstPageResult.Hits))
}
if len(secondPageResult.Hits) != 3 {
t.Errorf("Expected 3 results in second page, got %d", len(secondPageResult.Hits))
}
// Verify combined RRF results match expected ranking
verifyRRFResults(t, combinedHits)
})
}
}
// TestHybridRRFFaceting tests that facet results are identical whether using RRF or default scoring in hybrid search
func TestRRFFaceting(t *testing.T) {
scenarios := []struct {
name string
setup func(t *testing.T) (Index, func())
}{
{
name: "SingleIndex",
setup: setupSingleIndex,
},
{
name: "AliasWithSingleIndex",
setup: setupAliasWithSingleIndex,
},
{
name: "AliasWithTwoIndexes",
setup: setupAliasWithTwoIndexes,
},
{
name: "NestedAliases",
setup: setupNestedAliases,
},
}
for _, scenario := range scenarios {
t.Run(scenario.name, func(t *testing.T) {
// Setup the index/alias configuration
index, cleanup := scenario.setup(t)
defer cleanup()
// Create search request with default scoring and facets
defaultRequest := createScoreFusionRequest(ScoreDefault, false)
defaultRequest.Score = ScoreDefault // Use default scoring
defaultRequest.Size = 10
// Add facet for color field with size 10
colorFacet := NewFacetRequest("color", 10)
defaultRequest.AddFacet("color", colorFacet)
// Create search request with RRF scoring and identical facets
rrfRequest := createScoreFusionRequest(ScoreRRF, true)
rrfRequest.Score = ScoreRRF // Use RRF scoring
rrfRequest.Size = 10
// Add identical facet for color field with size 10
colorFacetRRF := NewFacetRequest("color", 10)
rrfRequest.AddFacet("color", colorFacetRRF)
// Execute both searches
defaultResult, err := index.Search(defaultRequest)
if err != nil {
t.Fatalf("Default scoring search failed: %v", err)
}
rrfResult, err := index.Search(rrfRequest)
if err != nil {
t.Fatalf("RRF scoring search failed: %v", err)
}
// Verify both searches returned results
if len(defaultResult.Hits) == 0 {
t.Fatal("Expected search results with default scoring, got none")
}
if len(rrfResult.Hits) == 0 {
t.Fatal("Expected search results with RRF scoring, got none")
}
// Verify both searches returned facets
if defaultResult.Facets == nil {
t.Fatal("Expected facets with default scoring, got nil")
}
if rrfResult.Facets == nil {
t.Fatal("Expected facets with RRF scoring, got nil")
}
// Check that color facet exists in both results
defaultColorFacet, defaultExists := defaultResult.Facets["color"]
rrfColorFacet, rrfExists := rrfResult.Facets["color"]
if !defaultExists {
t.Fatal("Expected color facet in default scoring results")
}
if !rrfExists {
t.Fatal("Expected color facet in RRF scoring results")
}
// Compare the facet results - they should be identical
// Since facets are based on the document corpus and not scoring,
// they should not be affected by the scoring method (even with KNN)
if defaultColorFacet.Total != rrfColorFacet.Total {
t.Errorf("Facet totals differ: default=%d, RRF=%d",
defaultColorFacet.Total, rrfColorFacet.Total)
}
if defaultColorFacet.Missing != rrfColorFacet.Missing {
t.Errorf("Facet missing counts differ: default=%d, RRF=%d",
defaultColorFacet.Missing, rrfColorFacet.Missing)
}
if defaultColorFacet.Other != rrfColorFacet.Other {
t.Errorf("Facet other counts differ: default=%d, RRF=%d",
defaultColorFacet.Other, rrfColorFacet.Other)
}
// Compare the facet terms
defaultTerms := defaultColorFacet.Terms.Terms()
rrfTerms := rrfColorFacet.Terms.Terms()
if len(defaultTerms) != len(rrfTerms) {
t.Errorf("Facet terms count differs: default=%d, RRF=%d",
len(defaultTerms), len(rrfTerms))
} else {
// Compare each term
for i, defaultTerm := range defaultTerms {
rrfTerm := rrfTerms[i]
if defaultTerm.Term != rrfTerm.Term {
t.Errorf("Facet term differs at position %d: default=%s, RRF=%s",
i, defaultTerm.Term, rrfTerm.Term)
}
if defaultTerm.Count != rrfTerm.Count {
t.Errorf("Facet term count differs for %s: default=%d, RRF=%d",
defaultTerm.Term, defaultTerm.Count, rrfTerm.Count)
}
}
}
})
}
}
// verifyRSFResults verifies that the search hits match expected RSF ranking and scores
func verifyRSFResults(t *testing.T, hits search.DocumentMatchCollection) {
// For RSF, we expect similar high-level results to RRF but with different scoring methodology
// RSF uses min-max normalization of scores within the window
// Expected top documents should include those matching "dark" query and similar vectors
// Verify we have reasonable number of results
if len(hits) == 0 {
t.Fatal("Expected non-empty search results for RSF")
}
// Verify we have at least 8 results
if len(hits) < 8 {
t.Errorf("Expected at least 6 results for RSF, got %d", len(hits))
}
// Documents that should definitely appear in top results (high relevance)
// These all get both text relevance (for "dark blue") or strong vector similarity
topExpectedDocs := []string{"dark blue", "navy", "blue", "medium blue"}
// Create map of all hits for easier lookup
docMap := make(map[string]int) // doc -> position (0-based)
for i, hit := range hits {
docMap[hit.ID] = i
}
// Verify that "dark blue" appears in top 5 positions (high text + vector relevance)
if pos, found := docMap["dark blue"]; !found {
t.Error("Expected 'dark blue' to appear in results but not found")
} else if pos >= 5 {
t.Errorf("Expected 'dark blue' in top 3 positions, found at position %d", pos+1)
}
// Verify that at least 3 of the top expected documents appear in top 5 results
topFoundCount := 0
for _, expectedDoc := range topExpectedDocs {
if pos, found := docMap[expectedDoc]; found && pos < 5 {
topFoundCount++
}
}
if topFoundCount < 3 {
t.Errorf("Expected at least 3 of top expected documents in top 5 results, found %d", topFoundCount)
}
// Verify scores are reasonable and within expected range
// RSF scores should be between 0 and sum of weights (3.0 with default weights)
// but typically should be more constrained than the full range
for i, hit := range hits {
if hit.Score < 0 || hit.Score > 3.0 {
t.Errorf("Hit %d (%s) has unreasonable score: %.6f", i, hit.ID, hit.Score)
}
// First hit should have a substantial score (at least 0.1)
if i == 0 && hit.Score < 0.1 {
t.Errorf("Top hit (%s) has unexpectedly low score: %.6f", hit.ID, hit.Score)
}
}
// Verify hits are sorted by score descending with strict ordering
for i := 1; i < len(hits); i++ {
if hits[i-1].Score < hits[i].Score {
t.Errorf("Hits not sorted properly: hit %d (%s, score %.6f) < hit %d (%s, score %.6f)",
i, hits[i-1].ID, hits[i-1].Score, i+1, hits[i].ID, hits[i].Score)
}
}
// Verify score range is reasonable - top score should be significantly higher than bottom
topScore := hits[0].Score
fifthScore := hits[4].Score
if topScore-fifthScore < 0.001 {
t.Errorf("Insufficient score differentiation: top score %.6f, 5rd score %.6f (diff: %.6f)",
topScore, fifthScore, topScore-fifthScore)
}
}
// TestRSFEndToEnd tests RSF scoring with a single index
func TestRSFEndToEnd(t *testing.T) {
// Setup the index configuration
index, cleanup := setupSingleIndex(t)
defer cleanup()
// Create the search request
searchRequest := createScoreFusionRequest(ScoreRSF, true)
ctx := context.Background()
ctx = context.WithValue(ctx, search.SearchTypeKey, search.GlobalScoring)
// Execute search
result, err := index.SearchInContext(ctx, searchRequest)
if err != nil {
t.Fatal(err)
}
// Verify RSF results
verifyRSFResults(t, result.Hits)
}
// TestRSFAliasWithSingleIndex tests RSF with an alias containing one index
func TestRSFAliasWithSingleIndex(t *testing.T) {
// Setup the alias configuration
alias, cleanup := setupAliasWithSingleIndex(t)
defer cleanup()
// Create the search request
searchRequest := createScoreFusionRequest(ScoreRSF, true)
ctx := context.Background()
ctx = context.WithValue(ctx, search.SearchTypeKey, search.GlobalScoring)
// Execute search
result, err := alias.SearchInContext(ctx, searchRequest)
if err != nil {
t.Fatal(err)
}
// Verify RSF results - should be identical to direct index search
verifyRSFResults(t, result.Hits)
}
// TestRSFAliasWithTwoIndexes tests RSF with an alias containing two indexes
func TestRSFAliasWithTwoIndexes(t *testing.T) {
// Setup the alias configuration
alias, cleanup := setupAliasWithTwoIndexes(t)
defer cleanup()
// Create the search request
searchRequest := createScoreFusionRequest(ScoreRSF, true)
ctx := context.Background()
ctx = context.WithValue(ctx, search.SearchTypeKey, search.GlobalScoring)
// Execute search
result, err := alias.SearchInContext(ctx, searchRequest)
if err != nil {
t.Fatal(err)
}
// Verify RSF results - should be identical to single index results
verifyRSFResults(t, result.Hits)
}
// TestRSFNestedAliases tests RSF with an alias containing two index aliases
func TestRSFNestedAliases(t *testing.T) {
// Setup the nested aliases configuration
masterAlias, cleanup := setupNestedAliases(t)
defer cleanup()
// Create the search request
searchRequest := createScoreFusionRequest(ScoreRSF, true)
ctx := context.Background()
ctx = context.WithValue(ctx, search.SearchTypeKey, search.GlobalScoring)
// Execute search
result, err := masterAlias.SearchInContext(ctx, searchRequest)
if err != nil {
t.Fatal(err)
}
// Verify RSF results - should be identical to single index results
verifyRSFResults(t, result.Hits)
}
// TestRSFPagination tests RSF with pagination across different index/alias configurations
func TestRSFPagination(t *testing.T) {
scenarios := []struct {
name string
setup func(t *testing.T) (Index, func())
}{
{
name: "SingleIndex",
setup: setupSingleIndex,
},
{
name: "AliasWithSingleIndex",
setup: setupAliasWithSingleIndex,
},
{
name: "AliasWithTwoIndexes",
setup: setupAliasWithTwoIndexes,
},
{
name: "NestedAliases",
setup: setupNestedAliases,
},
}
for _, scenario := range scenarios {
t.Run(scenario.name, func(t *testing.T) {
// Setup the index/alias configuration
index, cleanup := scenario.setup(t)
defer cleanup()
// Create first page request (first 5 results)
firstPageRequest := createScoreFusionRequest(ScoreDefault, true)
firstPageRequest.From = 0
firstPageRequest.Size = 5
// Execute first page search
firstPageResult, err := index.Search(firstPageRequest)
if err != nil {
t.Fatal(err)
}
// Create second page request (next 5 results, starting from index 5)
secondPageRequest := createScoreFusionRequest(ScoreDefault, true)
secondPageRequest.From = 5
secondPageRequest.Size = 5
// Execute second page search
secondPageResult, err := index.Search(secondPageRequest)
if err != nil {
t.Fatal(err)
}
// Combine results from both pages
combinedHits := make(search.DocumentMatchCollection, 0, len(firstPageResult.Hits)+len(secondPageResult.Hits))
combinedHits = append(combinedHits, firstPageResult.Hits...)
combinedHits = append(combinedHits, secondPageResult.Hits...)
// Verify we have reasonable number of results
if len(firstPageResult.Hits) == 0 {
t.Error("Expected results in first page, got none")
}
if len(combinedHits) == 0 {
t.Error("Expected combined results, got none")
}
// Verify combined RSF results
verifyRSFResults(t, combinedHits)
})
}
}
// TestRSFFaceting tests that facet results are identical whether using RSF or default scoring in hybrid search
func TestRSFFaceting(t *testing.T) {
scenarios := []struct {
name string
setup func(t *testing.T) (Index, func())
}{
{
name: "SingleIndex",
setup: setupSingleIndex,
},
{
name: "AliasWithSingleIndex",
setup: setupAliasWithSingleIndex,
},
{
name: "AliasWithTwoIndexes",
setup: setupAliasWithTwoIndexes,
},
{
name: "NestedAliases",
setup: setupNestedAliases,
},
}
for _, scenario := range scenarios {
t.Run(scenario.name, func(t *testing.T) {
// Setup the index/alias configuration
index, cleanup := scenario.setup(t)
defer cleanup()
// Create search request with default scoring and facets
defaultRequest := createScoreFusionRequest(ScoreDefault, false)
defaultRequest.Score = ScoreDefault // Use default scoring
defaultRequest.Size = 10
// Add facet for color field with size 10
colorFacet := NewFacetRequest("color", 10)
defaultRequest.AddFacet("color", colorFacet)
// Create search request with RSF scoring and identical facets
rsfRequest := createScoreFusionRequest(ScoreRSF, true)
rsfRequest.Size = 10
// Add identical facet for color field with size 10
colorFacetRSF := NewFacetRequest("color", 10)
rsfRequest.AddFacet("color", colorFacetRSF)
// Execute both searches
defaultResult, err := index.Search(defaultRequest)
if err != nil {
t.Fatalf("Default scoring search failed: %v", err)
}
rsfResult, err := index.Search(rsfRequest)
if err != nil {
t.Fatalf("RSF scoring search failed: %v", err)
}
// Verify both searches returned results
if len(defaultResult.Hits) == 0 {
t.Fatal("Expected search results with default scoring, got none")
}
if len(rsfResult.Hits) == 0 {
t.Fatal("Expected search results with RSF scoring, got none")
}
// Verify both searches returned facets
if defaultResult.Facets == nil {
t.Fatal("Expected facets with default scoring, got nil")
}
if rsfResult.Facets == nil {
t.Fatal("Expected facets with RSF scoring, got nil")
}
// Check that color facet exists in both results
defaultColorFacet, defaultExists := defaultResult.Facets["color"]
rsfColorFacet, rsfExists := rsfResult.Facets["color"]
if !defaultExists {
t.Fatal("Expected color facet in default scoring results")
}
if !rsfExists {
t.Fatal("Expected color facet in RSF scoring results")
}
// Compare the facet results - they should be identical
// Since facets are based on the document corpus and not scoring,
// they should not be affected by the scoring method (even with KNN)
if defaultColorFacet.Total != rsfColorFacet.Total {
t.Errorf("Facet totals differ: default=%d, RSF=%d",
defaultColorFacet.Total, rsfColorFacet.Total)
}
if defaultColorFacet.Missing != rsfColorFacet.Missing {
t.Errorf("Facet missing counts differ: default=%d, RSF=%d",
defaultColorFacet.Missing, rsfColorFacet.Missing)
}
if defaultColorFacet.Other != rsfColorFacet.Other {
t.Errorf("Facet other counts differ: default=%d, RSF=%d",
defaultColorFacet.Other, rsfColorFacet.Other)
}
// Compare the facet terms
defaultTerms := defaultColorFacet.Terms.Terms()
rsfTerms := rsfColorFacet.Terms.Terms()
if len(defaultTerms) != len(rsfTerms) {
t.Errorf("Facet terms count differs: default=%d, RSF=%d",
len(defaultTerms), len(rsfTerms))
} else {
// Compare each term
for i, defaultTerm := range defaultTerms {
rsfTerm := rsfTerms[i]
if defaultTerm.Term != rsfTerm.Term {
t.Errorf("Facet term differs at position %d: default=%s, RSF=%s",
i, defaultTerm.Term, rsfTerm.Term)
}
if defaultTerm.Count != rsfTerm.Count {
t.Errorf("Facet term count differs for %s: default=%d, RSF=%d",
defaultTerm.Term, defaultTerm.Count, rsfTerm.Count)
}
}
}
})
}
}
================================================
FILE: rescorer_test.go
================================================
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"fmt"
"testing"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/query"
)
func createFTSIndex(path string) (Index, error) {
// Index mapping for FTS-only testing
indexMapping := NewIndexMapping()
// Disable default mapping to match expected configuration
indexMapping.DefaultMapping.Enabled = false
indexMapping.DefaultMapping.Dynamic = false
// Create a specific document mapping type
docMapping := NewDocumentMapping()
docMapping.Enabled = true
docMapping.Dynamic = false
// Text field for color with specific properties
colorFieldMapping := NewTextFieldMapping()
colorFieldMapping.Analyzer = "en" // Use "en" analyzer as specified
colorFieldMapping.DocValues = true
colorFieldMapping.IncludeInAll = true
colorFieldMapping.Store = true
colorFieldMapping.Index = true
docMapping.AddFieldMappingsAt("color", colorFieldMapping)
// Text field for description with specific properties
descriptionFieldMapping := NewTextFieldMapping()
descriptionFieldMapping.Analyzer = "en"
descriptionFieldMapping.DocValues = true
descriptionFieldMapping.IncludeInAll = true
descriptionFieldMapping.Store = true
descriptionFieldMapping.Index = true
docMapping.AddFieldMappingsAt("description", descriptionFieldMapping)
// Text field for category with specific properties
categoryFieldMapping := NewTextFieldMapping()
categoryFieldMapping.Analyzer = "en"
categoryFieldMapping.DocValues = true
categoryFieldMapping.IncludeInAll = true
categoryFieldMapping.Store = true
categoryFieldMapping.Index = true
docMapping.AddFieldMappingsAt("category", categoryFieldMapping)
// Add the document mapping to the index
indexMapping.AddDocumentMapping("_default", docMapping)
// Create index
return New(path, indexMapping)
}
var benchmarkResult search.DocumentMatchCollection
type benchmarkConfig struct {
name string
ftsHits int
knnHits int
knnQueries int
}
func BenchmarkRescorerRRF(b *testing.B) {
runRescorerBenchmarks(b, ScoreRRF)
}
func BenchmarkRescorerRSF(b *testing.B) {
runRescorerBenchmarks(b, ScoreRSF)
}
func runRescorerBenchmarks(b *testing.B, scoreMode string) {
configs := []benchmarkConfig{
{name: "small", ftsHits: 256, knnHits: 192, knnQueries: 1},
{name: "medium", ftsHits: 1024, knnHits: 896, knnQueries: 2},
{name: "large", ftsHits: 4096, knnHits: 3584, knnQueries: 3},
}
for _, cfg := range configs {
b.Run(fmt.Sprintf("%s/%s", scoreMode, cfg.name), func(b *testing.B) {
b.ReportAllocs()
rescorer, baseFTSHits, baseKNNHits := buildBenchmarkInputs(cfg, scoreMode)
var last search.DocumentMatchCollection
b.StopTimer()
for i := 0; i < b.N; i++ {
ftsHits := cloneDocumentMatches(baseFTSHits)
knnHits := cloneDocumentMatches(baseKNNHits)
b.StartTimer()
hits, _, _ := rescorer.rescore(ftsHits, knnHits)
b.StopTimer()
last = hits
}
if len(last) == 0 {
b.Fatalf("rescorer returned no hits for config %q", cfg.name)
}
benchmarkResult = last
})
}
}
func buildBenchmarkInputs(cfg benchmarkConfig, scoreMode string) (*rescorer, search.DocumentMatchCollection, search.DocumentMatchCollection) {
windowSize := cfg.ftsHits
if cfg.knnHits > windowSize {
windowSize = cfg.knnHits
}
matchQuery := query.NewMatchQuery("rescorer benchmark payload")
matchQuery.SetBoost(1.0)
req := &SearchRequest{
Query: matchQuery,
Size: cfg.ftsHits,
From: 0,
Score: scoreMode,
Params: &RequestParams{ScoreRankConstant: DefaultScoreRankConstant, ScoreWindowSize: windowSize},
}
activeKNNQueries := cfg.knnQueries
if knnAdder, ok := interface{}(req).(interface {
AddKNN(field string, vector []float32, k int64, boost float64)
}); ok {
for i := 0; i < cfg.knnQueries; i++ {
knnAdder.AddKNN(fmt.Sprintf("vector_%d", i), []float32{1.0, 0.5, 0.25}, int64(cfg.knnHits), 1.0)
}
} else {
activeKNNQueries = 0
}
r := newRescorer(req)
r.origBoosts = make([]float64, activeKNNQueries+1)
for i := range r.origBoosts {
r.origBoosts[i] = 1.0
}
ftsHits, knnHits := buildBenchmarkHits(cfg, activeKNNQueries)
return r, ftsHits, knnHits
}
func buildBenchmarkHits(cfg benchmarkConfig, activeKNNQueries int) (search.DocumentMatchCollection, search.DocumentMatchCollection) {
ftsHits := make(search.DocumentMatchCollection, cfg.ftsHits)
for i := 0; i < cfg.ftsHits; i++ {
ftsHits[i] = &search.DocumentMatch{
ID: fmt.Sprintf("doc-%06d", i),
Score: float64(cfg.ftsHits - i),
HitNumber: uint64(i + 1),
}
}
knnHits := make(search.DocumentMatchCollection, cfg.knnHits)
for i := 0; i < cfg.knnHits; i++ {
id := fmt.Sprintf("doc-%06d", i)
if cfg.ftsHits > 0 {
id = fmt.Sprintf("doc-%06d", i%cfg.ftsHits)
}
if cfg.ftsHits == 0 || i%4 == 0 {
id = fmt.Sprintf("knn-only-%06d", i/4)
}
scoreBreakdown := make(map[int]float64, activeKNNQueries)
for q := 0; q < activeKNNQueries; q++ {
scoreBreakdown[q] = float64(cfg.knnHits - i + q + 1)
}
knnHits[i] = &search.DocumentMatch{
ID: id,
Score: float64(cfg.knnHits - i),
ScoreBreakdown: scoreBreakdown,
HitNumber: uint64(i + 1),
}
}
return ftsHits, knnHits
}
func cloneDocumentMatches(src search.DocumentMatchCollection) search.DocumentMatchCollection {
dst := make(search.DocumentMatchCollection, len(src))
for i, hit := range src {
if hit == nil {
continue
}
cloned := *hit
if hit.ScoreBreakdown != nil {
cloned.ScoreBreakdown = make(map[int]float64, len(hit.ScoreBreakdown))
for k, v := range hit.ScoreBreakdown {
cloned.ScoreBreakdown[k] = v
}
}
if len(hit.Sort) > 0 {
cloned.Sort = append([]string(nil), hit.Sort...)
}
if len(hit.DecodedSort) > 0 {
cloned.DecodedSort = append([]string(nil), hit.DecodedSort...)
}
if len(hit.IndexNames) > 0 {
cloned.IndexNames = append([]string(nil), hit.IndexNames...)
}
dst[i] = &cloned
}
return dst
}
func getFTSDocuments() []map[string]interface{} {
documents := []map[string]interface{}{
{
"color": "dark slate blue",
"description": "deep and rich color with dark undertones",
"category": "blue shades",
},
{
"color": "blue",
"description": "primary color that is bright and vibrant",
"category": "primary colors",
},
{
"color": "navy",
"description": "dark blue color often used in uniforms",
"category": "dark colors",
},
{
"color": "steel blue",
"description": "metallic blue with gray undertones",
"category": "metallic shades",
},
{
"color": "light blue",
"description": "pale and soft blue color with light appearance",
"category": "light colors",
},
{
"color": "deep sky blue",
"description": "bright blue reminiscent of clear skies",
"category": "sky colors",
},
{
"color": "royal blue",
"description": "rich and regal blue color fit for royalty",
"category": "rich colors",
},
{
"color": "powder blue",
"description": "very light blue with powder-like softness",
"category": "light colors",
},
{
"color": "corn flower blue",
"description": "medium blue color named after the flower",
"category": "floral colors",
},
{
"color": "alice blue",
"description": "very pale blue with light and airy quality",
"category": "light colors",
},
{
"color": "blue violet",
"description": "purple-blue color with violet undertones",
"category": "purple shades",
},
{
"color": "sky blue",
"description": "bright blue color of a clear day sky",
"category": "sky colors",
},
{
"color": "indigo",
"description": "deep purple-blue color with dark intensity",
"category": "dark colors",
},
{
"color": "midnight blue",
"description": "very dark blue like the night sky",
"category": "dark colors",
},
{
"color": "dark blue",
"description": "deep blue color with dark characteristics",
"category": "dark colors",
},
{
"color": "medium slate blue",
"description": "medium intensity blue with slate properties",
"category": "blue shades",
},
{
"color": "cadet blue",
"description": "grayish blue color often used in uniforms",
"category": "metallic shades",
},
{
"color": "light steel blue",
"description": "light metallic blue with steel-like appearance",
"category": "light colors",
},
{
"color": "dodger blue",
"description": "bright medium blue with vibrant intensity",
"category": "bright colors",
},
{
"color": "medium blue",
"description": "standard blue with medium intensity and saturation",
"category": "blue shades",
},
{
"color": "slate blue",
"description": "blue-gray color with slate-like properties",
"category": "blue shades",
},
{
"color": "light sky blue",
"description": "light version of sky blue with airy quality",
"category": "light colors",
},
}
return documents
}
func createFTSSearchRequest(scoreMethod string) *SearchRequest {
// Create multi-FTS search request (multiple FTS queries for fusion scoring)
// Query 1: Search for "dark" in color field
query1 := query.NewMatchPhraseQuery("dark")
query1.SetField("color")
// Query 2: Search for "light" in description field
query2 := query.NewMatchPhraseQuery("light")
query2.SetField("description")
// // Query 3: Search for "blue" in category field
query3 := query.NewMatchPhraseQuery("blue")
query3.SetField("category")
// Use the first query as the main query for the search request
searchRequest := NewSearchRequest(query1)
// Add additional queries for fusion scoring (this simulates multiple query sources)
// Since SearchRequest doesn't have a direct way to add multiple FTS queries,
// we'll use a disjunction query to combine them for fusion scoring simulation
queries := []query.Query{query1, query2, query3}
disjunctionQuery := query.NewDisjunctionQuery(queries)
searchRequest.Query = disjunctionQuery
params := RequestParams{1, 10}
searchRequest.AddParams(params)
searchRequest.Size = 10
searchRequest.Score = scoreMethod
searchRequest.Explain = false
return searchRequest
}
// verifyFTSRRFResults verifies that the search hits match the expected RRF ranking and scores
func verifyFTSRRFResults(t *testing.T, hits search.DocumentMatchCollection) {
// Manual RRF calculation for verification
// With k=1 (ScoreRankConstant), RRF formula: 1/(1+rank)
//
// For FTS-only with disjunction query, we need to consider how each document
// matches each of the three query components:
// 1. "dark" in color field
// 2. "light" in description field
// 3. "blue" in category field
//
// Documents that match multiple query components will rank higher
// Expected matches:
// Query 1 ("dark" in color): dark slate blue, dark blue, midnight blue (has "dark")
// Query 2 ("light" in description): light blue, powder blue, alice blue, light steel blue, light sky blue
// Query 3 ("blue" in category): dark slate blue, medium slate blue, medium blue, slate blue
expectedTopDocuments := []string{
"dark slate blue", // matches query 1 and 3
"light blue", // matches query 2
"dark blue", // matches query 1
"light steel blue", // matches query 2
"medium slate blue", // matches query 3
}
if len(hits) == 0 {
t.Fatal("Expected search results, got none")
}
// Verify we have results and they're ranked by score
for i := 0; i < len(hits)-1; i++ {
if hits[i].Score < hits[i+1].Score {
t.Errorf("Results not properly ranked by score: position %d (%.6f) < position %d (%.6f)",
i, hits[i].Score, i+1, hits[i+1].Score)
}
}
// Check that some expected top documents are present in results
foundExpected := 0
for _, hit := range hits {
for _, expected := range expectedTopDocuments {
if hit.ID == expected {
foundExpected++
break
}
}
}
if foundExpected < 3 {
t.Errorf("Expected to find at least 3 of the top expected documents, found %d", foundExpected)
t.Logf("Actual results:")
for i, hit := range hits {
t.Logf(" %d: %s (score: %.6f)", i+1, hit.ID, hit.Score)
}
}
}
// setupFTSSingleIndex creates a single index with all FTS documents
func setupFTSSingleIndex(t *testing.T) (Index, func()) {
tmpIndexPath := createTmpIndexPath(t)
index, err := createFTSIndex(tmpIndexPath)
if err != nil {
t.Fatal(err)
}
documents := getFTSDocuments()
// Index documents
batch := index.NewBatch()
for _, doc := range documents {
colorName := doc["color"].(string)
err = batch.Index(colorName, doc)
if err != nil {
t.Fatal(err)
}
}
err = index.Batch(batch)
if err != nil {
t.Fatal(err)
}
cleanup := func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
cleanupTmpIndexPath(t, tmpIndexPath)
}
return index, cleanup
}
// setupFTSAliasWithSingleIndex creates an alias containing one index with all FTS documents
func setupFTSAliasWithSingleIndex(t *testing.T) (Index, func()) {
tmpIndexPath := createTmpIndexPath(t)
index, err := createFTSIndex(tmpIndexPath)
if err != nil {
t.Fatal(err)
}
documents := getFTSDocuments()
// Create alias and add the single index
alias := NewIndexAlias()
alias.Add(index)
// Index all documents
batch := alias.NewBatch()
for _, doc := range documents {
colorName := doc["color"].(string)
err = batch.Index(colorName, doc)
if err != nil {
t.Fatal(err)
}
}
err = alias.Batch(batch)
if err != nil {
t.Fatal(err)
}
cleanup := func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
cleanupTmpIndexPath(t, tmpIndexPath)
}
return alias, cleanup
}
// setupFTSAliasWithTwoIndexes creates an alias containing two indexes with FTS documents split between them
func setupFTSAliasWithTwoIndexes(t *testing.T) (Index, func()) {
documents := getFTSDocuments()
// Split documents into two groups
midpoint := len(documents) / 2
docs1 := documents[:midpoint]
docs2 := documents[midpoint:]
// Create first index
tmpIndexPath1 := createTmpIndexPath(t)
index1, err := createFTSIndex(tmpIndexPath1)
if err != nil {
t.Fatal(err)
}
// Index first half of documents
batch1 := index1.NewBatch()
for _, doc := range docs1 {
colorName := doc["color"].(string)
err = batch1.Index(colorName, doc)
if err != nil {
t.Fatal(err)
}
}
err = index1.Batch(batch1)
if err != nil {
t.Fatal(err)
}
// Create second index
tmpIndexPath2 := createTmpIndexPath(t)
index2, err := createFTSIndex(tmpIndexPath2)
if err != nil {
t.Fatal(err)
}
// Index second half of documents
batch2 := index2.NewBatch()
for _, doc := range docs2 {
colorName := doc["color"].(string)
err = batch2.Index(colorName, doc)
if err != nil {
t.Fatal(err)
}
}
err = index2.Batch(batch2)
if err != nil {
t.Fatal(err)
}
// Create alias and add both indexes
alias := NewIndexAlias()
alias.Add(index1, index2)
cleanup := func() {
err := index1.Close()
if err != nil {
t.Fatal(err)
}
err = index2.Close()
if err != nil {
t.Fatal(err)
}
cleanupTmpIndexPath(t, tmpIndexPath1)
cleanupTmpIndexPath(t, tmpIndexPath2)
}
return alias, cleanup
}
// setupFTSNestedAliases creates nested aliases with three indexes spread across sub-aliases
func setupFTSNestedAliases(t *testing.T) (Index, func()) {
documents := getFTSDocuments()
// Split documents into three groups
thirdPoint1 := len(documents) / 3
thirdPoint2 := 2 * len(documents) / 3
docs1 := documents[:thirdPoint1]
docs2 := documents[thirdPoint1:thirdPoint2]
docs3 := documents[thirdPoint2:]
// Create first index
tmpIndexPath1 := createTmpIndexPath(t)
index1, err := createFTSIndex(tmpIndexPath1)
if err != nil {
t.Fatal(err)
}
// Index first third of documents
batch1 := index1.NewBatch()
for _, doc := range docs1 {
colorName := doc["color"].(string)
err = batch1.Index(colorName, doc)
if err != nil {
t.Fatal(err)
}
}
err = index1.Batch(batch1)
if err != nil {
t.Fatal(err)
}
// Create second index
tmpIndexPath2 := createTmpIndexPath(t)
index2, err := createFTSIndex(tmpIndexPath2)
if err != nil {
t.Fatal(err)
}
// Index second third of documents
batch2 := index2.NewBatch()
for _, doc := range docs2 {
colorName := doc["color"].(string)
err = batch2.Index(colorName, doc)
if err != nil {
t.Fatal(err)
}
}
err = index2.Batch(batch2)
if err != nil {
t.Fatal(err)
}
// Create third index
tmpIndexPath3 := createTmpIndexPath(t)
index3, err := createFTSIndex(tmpIndexPath3)
if err != nil {
t.Fatal(err)
}
// Index third third of documents
batch3 := index3.NewBatch()
for _, doc := range docs3 {
colorName := doc["color"].(string)
err = batch3.Index(colorName, doc)
if err != nil {
t.Fatal(err)
}
}
err = index3.Batch(batch3)
if err != nil {
t.Fatal(err)
}
// Create first sub-alias (contains 1 index)
subAlias1 := NewIndexAlias()
subAlias1.SetName("subAlias1")
subAlias1.Add(index1)
// Create second sub-alias (contains 2 indexes)
subAlias2 := NewIndexAlias()
subAlias2.SetName("subAlias2")
subAlias2.Add(index2, index3)
// Create master alias containing the two sub-aliases
masterAlias := NewIndexAlias()
masterAlias.SetName("masterAlias")
masterAlias.Add(subAlias1, subAlias2)
cleanup := func() {
err := index1.Close()
if err != nil {
t.Fatal(err)
}
err = index2.Close()
if err != nil {
t.Fatal(err)
}
err = index3.Close()
if err != nil {
t.Fatal(err)
}
cleanupTmpIndexPath(t, tmpIndexPath1)
cleanupTmpIndexPath(t, tmpIndexPath2)
cleanupTmpIndexPath(t, tmpIndexPath3)
}
return masterAlias, cleanup
}
func TestFTSRRFEndToEnd(t *testing.T) {
// Setup the index configuration
index, cleanup := setupFTSSingleIndex(t)
defer cleanup()
// Create the search request
searchRequest := createFTSSearchRequest(ScoreRRF)
// Execute search
result, err := index.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
// Verify FTS RRF results
verifyFTSRRFResults(t, result.Hits)
}
// TestFTSRRFAliasWithSingleIndex tests RRF with an alias containing one index
func TestFTSRRFAliasWithSingleIndex(t *testing.T) {
// Setup the alias configuration
alias, cleanup := setupFTSAliasWithSingleIndex(t)
defer cleanup()
// Create the search request
searchRequest := createFTSSearchRequest(ScoreRRF)
// Execute search through alias
result, err := alias.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
// Verify FTS RRF results - should be identical to direct index search
verifyFTSRRFResults(t, result.Hits)
}
// TestFTSRRFAliasWithTwoIndexes tests RRF with an alias containing two indexes
func TestFTSRRFAliasWithTwoIndexes(t *testing.T) {
// Setup the alias configuration
alias, cleanup := setupFTSAliasWithTwoIndexes(t)
defer cleanup()
// Create the search request
searchRequest := createFTSSearchRequest(ScoreRRF)
// Execute search through alias
result, err := alias.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
// Verify FTS RRF results - should be consistent across distributed indexes
verifyFTSRRFResults(t, result.Hits)
}
// TestFTSRRFNestedAliases tests RRF with an alias containing two index aliases
func TestFTSRRFNestedAliases(t *testing.T) {
// Setup the nested aliases configuration
masterAlias, cleanup := setupFTSNestedAliases(t)
defer cleanup()
// Create the search request
searchRequest := createFTSSearchRequest(ScoreRRF)
// Execute search through master alias
result, err := masterAlias.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
// Verify FTS RRF results - should be consistent across nested aliases
verifyFTSRRFResults(t, result.Hits)
}
// TestFTSRRFPagination tests FTS RRF with pagination across different index/alias configurations
func TestFTSRRFPagination(t *testing.T) {
scenarios := []struct {
name string
setup func(t *testing.T) (Index, func())
}{
{
name: "SingleIndex",
setup: setupFTSSingleIndex,
},
{
name: "AliasWithSingleIndex",
setup: setupFTSAliasWithSingleIndex,
},
{
name: "AliasWithTwoIndexes",
setup: setupFTSAliasWithTwoIndexes,
},
{
name: "NestedAliases",
setup: setupFTSNestedAliases,
},
}
for _, scenario := range scenarios {
t.Run(scenario.name, func(t *testing.T) {
// Setup the index/alias configuration
index, cleanup := scenario.setup(t)
defer cleanup()
// Create first page request (first 5 results)
firstPageRequest := createFTSSearchRequest(ScoreRRF)
firstPageRequest.From = 0
firstPageRequest.Size = 5
// Execute first page search
firstPageResult, err := index.Search(firstPageRequest)
if err != nil {
t.Fatal(err)
}
// Create second page request (next 5 results, starting from index 5)
secondPageRequest := createFTSSearchRequest(ScoreRRF)
secondPageRequest.From = 5
secondPageRequest.Size = 5
// Execute second page search
secondPageResult, err := index.Search(secondPageRequest)
if err != nil {
t.Fatal(err)
}
// Combine results from both pages
combinedHits := make(search.DocumentMatchCollection, 0, len(firstPageResult.Hits)+len(secondPageResult.Hits))
combinedHits = append(combinedHits, firstPageResult.Hits...)
combinedHits = append(combinedHits, secondPageResult.Hits...)
// Verify we have results (FTS may have variable results based on matches)
if len(firstPageResult.Hits) == 0 {
t.Fatal("Expected at least some results in first page, got 0")
}
if len(firstPageResult.Hits) > 5 {
t.Errorf("Expected at most 5 results in first page, got %d", len(firstPageResult.Hits))
}
// Total hits should not exceed the number of documents that match our queries
totalHits := len(combinedHits)
if totalHits == 0 {
t.Fatal("Expected at least some combined results, got 0")
}
// Verify combined FTS RRF results
verifyFTSRRFResults(t, combinedHits)
})
}
}
// TestFTSRRFFaceting tests that facet results are identical whether using RRF or default scoring
func TestFTSRRFFaceting(t *testing.T) {
scenarios := []struct {
name string
setup func(t *testing.T) (Index, func())
}{
{
name: "SingleIndex",
setup: setupFTSSingleIndex,
},
{
name: "AliasWithSingleIndex",
setup: setupFTSAliasWithSingleIndex,
},
{
name: "AliasWithTwoIndexes",
setup: setupFTSAliasWithTwoIndexes,
},
{
name: "NestedAliases",
setup: setupFTSNestedAliases,
},
}
for _, scenario := range scenarios {
t.Run(scenario.name, func(t *testing.T) {
// Setup the index/alias configuration
index, cleanup := scenario.setup(t)
defer cleanup()
// Create search request with default scoring and facets
defaultRequest := createFTSSearchRequest(ScoreRRF)
defaultRequest.Score = ScoreDefault // Use default scoring
defaultRequest.Size = 10
// Add facet for color field with size 10
colorFacet := NewFacetRequest("color", 10)
defaultRequest.AddFacet("color", colorFacet)
// Create search request with RRF scoring and identical facets
rrfRequest := createFTSSearchRequest(ScoreRRF)
rrfRequest.Size = 10
rrfRequest.Score = ScoreRRF
// Add identical facet for color field with size 10
colorFacetRRF := NewFacetRequest("color", 10)
rrfRequest.AddFacet("color", colorFacetRRF)
// Execute both searches
defaultResult, err := index.Search(defaultRequest)
if err != nil {
t.Fatalf("Default scoring search failed: %v", err)
}
rrfResult, err := index.Search(rrfRequest)
if err != nil {
t.Fatalf("RRF scoring search failed: %v", err)
}
// Verify both searches returned results
if len(defaultResult.Hits) == 0 {
t.Fatal("Expected search results with default scoring, got none")
}
if len(rrfResult.Hits) == 0 {
t.Fatal("Expected search results with RRF scoring, got none")
}
// Verify both searches returned facets
if defaultResult.Facets == nil {
t.Fatal("Expected facets with default scoring, got nil")
}
if rrfResult.Facets == nil {
t.Fatal("Expected facets with RRF scoring, got nil")
}
// Check that color facet exists in both results
defaultColorFacet, defaultExists := defaultResult.Facets["color"]
rrfColorFacet, rrfExists := rrfResult.Facets["color"]
if !defaultExists {
t.Fatal("Expected color facet in default scoring results")
}
if !rrfExists {
t.Fatal("Expected color facet in RRF scoring results")
}
// Compare the facet results - they should be identical
// Since facets are based on the document corpus and not scoring,
// they should not be affected by the scoring method
if defaultColorFacet.Total != rrfColorFacet.Total {
t.Errorf("Facet totals differ: default=%d, RRF=%d",
defaultColorFacet.Total, rrfColorFacet.Total)
}
if defaultColorFacet.Missing != rrfColorFacet.Missing {
t.Errorf("Facet missing counts differ: default=%d, RRF=%d",
defaultColorFacet.Missing, rrfColorFacet.Missing)
}
if defaultColorFacet.Other != rrfColorFacet.Other {
t.Errorf("Facet other counts differ: default=%d, RRF=%d",
defaultColorFacet.Other, rrfColorFacet.Other)
}
// Compare the facet terms
defaultTerms := defaultColorFacet.Terms.Terms()
rrfTerms := rrfColorFacet.Terms.Terms()
if len(defaultTerms) != len(rrfTerms) {
t.Errorf("Facet terms count differs: default=%d, RRF=%d",
len(defaultTerms), len(rrfTerms))
} else {
// Compare each term
for i, defaultTerm := range defaultTerms {
rrfTerm := rrfTerms[i]
if defaultTerm.Term != rrfTerm.Term {
t.Errorf("Facet term differs at position %d: default=%s, RRF=%s",
i, defaultTerm.Term, rrfTerm.Term)
}
if defaultTerm.Count != rrfTerm.Count {
t.Errorf("Facet term count differs for %s: default=%d, RRF=%d",
defaultTerm.Term, defaultTerm.Count, rrfTerm.Count)
}
}
}
})
}
}
// verifyFTSRSFResults verifies that the search hits match expected RSF ranking and scores for FTS-only search
func verifyFTSRSFResults(t *testing.T, hits search.DocumentMatchCollection) {
// For FTS RSF, we expect documents that match multiple query components to rank higher
// Query components: "dark" in color, "light" in description, "blue" in category
// RSF uses min-max normalization of scores within the window
// Verify we have reasonable number of results
if len(hits) == 0 {
t.Fatal("Expected non-empty search results for FTS RSF")
}
// Verify we have at least 5 results for meaningful comparison
if len(hits) < 5 {
t.Errorf("Expected at least 5 results for FTS RSF, got %d", len(hits))
}
// Documents that should appear in top results based on multi-query matching:
// - "dark slate blue": matches "dark" in color AND "blue" in category (2 matches)
// - "light blue": matches "light" in description (1 strong match)
// - "dark blue": matches "dark" in color (1 match)
// - Documents with "light" in description should rank well
topExpectedDocs := []string{"dark slate blue", "light blue", "dark blue", "medium slate blue", "light sky blue"}
// Create map of all hits for easier lookup
docMap := make(map[string]int) // doc -> position (0-based)
for i, hit := range hits {
docMap[hit.ID] = i
}
// Verify that "dark slate blue" appears in top 3 positions (matches 2 query components)
if pos, found := docMap["dark slate blue"]; !found {
t.Error("Expected 'dark slate blue' to appear in results but not found")
} else if pos >= 3 {
t.Errorf("Expected 'dark slate blue' in top 3 positions, found at position %d", pos+1)
}
// Verify that at least 3 of the top expected documents appear in top 5 results
topFoundCount := 0
for _, expectedDoc := range topExpectedDocs {
if pos, found := docMap[expectedDoc]; found && pos < 5 {
topFoundCount++
}
}
if topFoundCount < 3 {
t.Errorf("Expected at least 3 of top expected documents in top 5 results, found %d", topFoundCount)
}
// Verify scores are reasonable and within expected range
// RSF scores should be between 0 and sum of weights (3.0 with default weights)
for i, hit := range hits {
if hit.Score < 0 || hit.Score > 3.0 {
t.Errorf("Hit %d (%s) has unreasonable score: %.6f", i, hit.ID, hit.Score)
}
// First hit should have a substantial score (at least 0.1)
if i == 0 && hit.Score < 0.1 {
t.Errorf("Top hit (%s) has unexpectedly low score: %.6f", hit.ID, hit.Score)
}
}
// Verify hits are sorted by score descending with strict ordering
for i := 1; i < len(hits); i++ {
if hits[i-1].Score < hits[i].Score {
t.Errorf("Hits not sorted properly: hit %d (%s, score %.6f) < hit %d (%s, score %.6f)",
i, hits[i-1].ID, hits[i-1].Score, i+1, hits[i].ID, hits[i].Score)
}
}
// Verify score range is reasonable - top score should be significantly higher than 5th
if len(hits) >= 5 {
topScore := hits[0].Score
fifthScore := hits[4].Score
if topScore-fifthScore < 0.001 {
t.Errorf("Insufficient score differentiation: top score %.6f, 5th score %.6f (diff: %.6f)",
topScore, fifthScore, topScore-fifthScore)
}
}
}
// TestFTSRSFEndToEnd tests RSF scoring with a single FTS index
func TestFTSRSFEndToEnd(t *testing.T) {
// Setup the index configuration
index, cleanup := setupFTSSingleIndex(t)
defer cleanup()
// Create the search request
searchRequest := createFTSSearchRequest(ScoreRSF)
// Execute search
result, err := index.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
// Verify FTS RSF results
verifyFTSRSFResults(t, result.Hits)
}
// TestFTSRSFAliasWithSingleIndex tests RSF with an alias containing one FTS index
func TestFTSRSFAliasWithSingleIndex(t *testing.T) {
// Setup the alias configuration
alias, cleanup := setupFTSAliasWithSingleIndex(t)
defer cleanup()
// Create the search request
searchRequest := createFTSSearchRequest(ScoreRSF)
// Execute search through alias
result, err := alias.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
// Verify FTS RSF results - should be identical to direct index search
verifyFTSRSFResults(t, result.Hits)
}
// TestFTSRSFAliasWithTwoIndexes tests RSF with an alias containing two FTS indexes
func TestFTSRSFAliasWithTwoIndexes(t *testing.T) {
// Setup the alias configuration
alias, cleanup := setupFTSAliasWithTwoIndexes(t)
defer cleanup()
// Create the search request
searchRequest := createFTSSearchRequest(ScoreRSF)
// Execute search through alias
result, err := alias.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
// Verify FTS RSF results - should be consistent across distributed indexes
verifyFTSRSFResults(t, result.Hits)
}
// TestFTSRSFNestedAliases tests RSF with an alias containing two index aliases
func TestFTSRSFNestedAliases(t *testing.T) {
// Setup the nested aliases configuration
masterAlias, cleanup := setupFTSNestedAliases(t)
defer cleanup()
// Create the search request
searchRequest := createFTSSearchRequest(ScoreRSF)
// Execute search through master alias
result, err := masterAlias.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
// Verify FTS RSF results - should be consistent across nested aliases
verifyFTSRSFResults(t, result.Hits)
}
// TestFTSRSFPagination tests FTS RSF with pagination across different index/alias configurations
func TestFTSRSFPagination(t *testing.T) {
scenarios := []struct {
name string
setup func(t *testing.T) (Index, func())
}{
{
name: "SingleIndex",
setup: setupFTSSingleIndex,
},
{
name: "AliasWithSingleIndex",
setup: setupFTSAliasWithSingleIndex,
},
{
name: "AliasWithTwoIndexes",
setup: setupFTSAliasWithTwoIndexes,
},
{
name: "NestedAliases",
setup: setupFTSNestedAliases,
},
}
for _, scenario := range scenarios {
t.Run(scenario.name, func(t *testing.T) {
// Setup the index/alias configuration
index, cleanup := scenario.setup(t)
defer cleanup()
// Create first page request (first 5 results)
firstPageRequest := createFTSSearchRequest(ScoreRSF)
firstPageRequest.From = 0
firstPageRequest.Size = 5
// Execute first page search
firstPageResult, err := index.Search(firstPageRequest)
if err != nil {
t.Fatal(err)
}
// Create second page request (next 5 results, starting from index 5)
secondPageRequest := createFTSSearchRequest(ScoreRSF)
secondPageRequest.From = 5
secondPageRequest.Size = 5
// Execute second page search
secondPageResult, err := index.Search(secondPageRequest)
if err != nil {
t.Fatal(err)
}
// Combine results from both pages
combinedHits := make(search.DocumentMatchCollection, 0, len(firstPageResult.Hits)+len(secondPageResult.Hits))
combinedHits = append(combinedHits, firstPageResult.Hits...)
combinedHits = append(combinedHits, secondPageResult.Hits...)
// Verify we have results (FTS may have variable results based on matches)
if len(firstPageResult.Hits) == 0 {
t.Fatal("Expected at least some results in first page, got 0")
}
if len(firstPageResult.Hits) > 5 {
t.Errorf("Expected at most 5 results in first page, got %d", len(firstPageResult.Hits))
}
// Total hits should not exceed the number of documents that match our queries
totalHits := len(combinedHits)
if totalHits == 0 {
t.Fatal("Expected at least some combined results, got 0")
}
// Verify combined FTS RSF results
verifyFTSRSFResults(t, combinedHits)
})
}
}
// TestFTSRSFFaceting tests that facet results are identical whether using RSF or default scoring
func TestFTSRSFFaceting(t *testing.T) {
scenarios := []struct {
name string
setup func(t *testing.T) (Index, func())
}{
{
name: "SingleIndex",
setup: setupFTSSingleIndex,
},
{
name: "AliasWithSingleIndex",
setup: setupFTSAliasWithSingleIndex,
},
{
name: "AliasWithTwoIndexes",
setup: setupFTSAliasWithTwoIndexes,
},
{
name: "NestedAliases",
setup: setupFTSNestedAliases,
},
}
for _, scenario := range scenarios {
t.Run(scenario.name, func(t *testing.T) {
// Setup the index/alias configuration
index, cleanup := scenario.setup(t)
defer cleanup()
// Create search request with default scoring and facets
defaultRequest := createFTSSearchRequest(ScoreRRF)
defaultRequest.Score = ScoreDefault // Use default scoring
defaultRequest.Size = 10
// Add facet for category field with size 10
categoryFacet := NewFacetRequest("category", 10)
defaultRequest.AddFacet("category", categoryFacet)
// Create search request with RSF scoring and identical facets
rsfRequest := createFTSSearchRequest(ScoreRSF)
rsfRequest.Size = 10
// Add identical facet for category field with size 10
categoryFacetRSF := NewFacetRequest("category", 10)
rsfRequest.AddFacet("category", categoryFacetRSF)
// Execute both searches
defaultResult, err := index.Search(defaultRequest)
if err != nil {
t.Fatalf("Default scoring search failed: %v", err)
}
rsfResult, err := index.Search(rsfRequest)
if err != nil {
t.Fatalf("RSF scoring search failed: %v", err)
}
// Verify both searches returned results
if len(defaultResult.Hits) == 0 {
t.Fatal("Expected search results with default scoring, got none")
}
if len(rsfResult.Hits) == 0 {
t.Fatal("Expected search results with RSF scoring, got none")
}
// Verify both searches returned facets
if defaultResult.Facets == nil {
t.Fatal("Expected facets with default scoring, got nil")
}
if rsfResult.Facets == nil {
t.Fatal("Expected facets with RSF scoring, got nil")
}
// Check that category facet exists in both results
defaultCategoryFacet, defaultExists := defaultResult.Facets["category"]
rsfCategoryFacet, rsfExists := rsfResult.Facets["category"]
if !defaultExists {
t.Fatal("Expected category facet in default scoring results")
}
if !rsfExists {
t.Fatal("Expected category facet in RSF scoring results")
}
// Compare the facet results - they should be identical
// Since facets are based on the document corpus and not scoring,
// they should not be affected by the scoring method
if defaultCategoryFacet.Total != rsfCategoryFacet.Total {
t.Errorf("Facet totals differ: default=%d, RSF=%d",
defaultCategoryFacet.Total, rsfCategoryFacet.Total)
}
if defaultCategoryFacet.Missing != rsfCategoryFacet.Missing {
t.Errorf("Facet missing counts differ: default=%d, RSF=%d",
defaultCategoryFacet.Missing, rsfCategoryFacet.Missing)
}
if defaultCategoryFacet.Other != rsfCategoryFacet.Other {
t.Errorf("Facet other counts differ: default=%d, RSF=%d",
defaultCategoryFacet.Other, rsfCategoryFacet.Other)
}
// Compare the facet terms
defaultTerms := defaultCategoryFacet.Terms.Terms()
rsfTerms := rsfCategoryFacet.Terms.Terms()
if len(defaultTerms) != len(rsfTerms) {
t.Errorf("Facet terms count differs: default=%d, RSF=%d",
len(defaultTerms), len(rsfTerms))
} else {
// Compare each term
for i, defaultTerm := range defaultTerms {
rsfTerm := rsfTerms[i]
if defaultTerm.Term != rsfTerm.Term {
t.Errorf("Facet term differs at position %d: default=%s, RSF=%s",
i, defaultTerm.Term, rsfTerm.Term)
}
if defaultTerm.Count != rsfTerm.Count {
t.Errorf("Facet term count differs for %s: default=%d, RSF=%d",
defaultTerm.Term, defaultTerm.Count, rsfTerm.Count)
}
}
}
})
}
}
================================================
FILE: search/collector/bench_test.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"context"
"math/rand"
"strconv"
"testing"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
type createCollector func() search.Collector
func benchHelper(numOfMatches int, cc createCollector, b *testing.B) {
matches := make([]*search.DocumentMatch, 0, numOfMatches)
for i := 0; i < numOfMatches; i++ {
matches = append(matches, &search.DocumentMatch{
IndexInternalID: index.IndexInternalID(strconv.Itoa(i)),
Score: rand.Float64(),
})
}
b.ResetTimer()
for run := 0; run < b.N; run++ {
searcher := &stubSearcher{
matches: matches,
}
collector := cc()
err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
b.Fatal(err)
}
}
}
================================================
FILE: search/collector/eligible.go
================================================
// Copyright (c) 2024 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package collector
import (
"context"
"fmt"
"time"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
type EligibleCollector struct {
size int
total uint64
took time.Duration
eligibleSelector index.EligibleDocumentSelector
}
func NewEligibleCollector(size int) *EligibleCollector {
return newEligibleCollector(size)
}
func newEligibleCollector(size int) *EligibleCollector {
// No sort order & skip always 0 since this is only to filter eligible docs.
ec := &EligibleCollector{
size: size,
}
return ec
}
func makeEligibleDocumentMatchHandler(ctx *search.SearchContext, reader index.IndexReader) (search.DocumentMatchHandler, error) {
if ec, ok := ctx.Collector.(*EligibleCollector); ok {
if vr, ok := reader.(index.VectorIndexReader); ok {
// create a new eligible document selector to add eligible document matches
ec.eligibleSelector = vr.NewEligibleDocumentSelector()
// return a document match handler that adds eligible document matches
// to the eligible document selector
return func(d *search.DocumentMatch) error {
if d == nil {
return nil
}
err := ec.eligibleSelector.AddEligibleDocumentMatch(d.IndexInternalID)
if err != nil {
return err
}
// recycle the DocumentMatch
ctx.DocumentMatchPool.Put(d)
return nil
}, nil
}
return nil, fmt.Errorf("reader is not a VectorIndexReader")
}
return nil, fmt.Errorf("eligiblity collector not available")
}
func (ec *EligibleCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
startTime := time.Now()
var err error
var next *search.DocumentMatch
backingSize := ec.size
if backingSize > PreAllocSizeSkipCap {
backingSize = PreAllocSizeSkipCap + 1
}
searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), 0),
Collector: ec,
IndexReader: reader,
}
dmHandler, err := makeEligibleDocumentMatchHandler(searchContext, reader)
if err != nil {
return err
}
select {
case <-ctx.Done():
search.RecordSearchCost(ctx, search.AbortM, 0)
return ctx.Err()
default:
next, err = searcher.Next(searchContext)
}
for err == nil && next != nil {
if ec.total%CheckDoneEvery == 0 {
select {
case <-ctx.Done():
search.RecordSearchCost(ctx, search.AbortM, 0)
return ctx.Err()
default:
}
}
ec.total++
err = dmHandler(next)
if err != nil {
break
}
next, err = searcher.Next(searchContext)
}
if err != nil {
return err
}
// help finalize/flush the results in case
// of custom document match handlers.
err = dmHandler(nil)
if err != nil {
return err
}
// compute search duration
ec.took = time.Since(startTime)
return nil
}
// The eligible collector does not return any document matches and hence
// this method is a dummy method returning nil, to conform to the
// search.Collector interface.
func (ec *EligibleCollector) Results() search.DocumentMatchCollection {
return nil
}
// EligibleSelector returns the eligible document selector, which can be used
// to retrieve the list of eligible documents from this collector.
// If the collector has no results, it returns nil.
func (ec *EligibleCollector) EligibleSelector() index.EligibleDocumentSelector {
if ec.total == 0 {
return nil
}
return ec.eligibleSelector
}
func (ec *EligibleCollector) Total() uint64 {
return ec.total
}
// No concept of scoring in the eligible collector.
func (ec *EligibleCollector) MaxScore() float64 {
return 0
}
func (ec *EligibleCollector) Took() time.Duration {
return ec.took
}
func (ec *EligibleCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
// facet unsupported for pre-filtering in KNN search
}
func (ec *EligibleCollector) FacetResults() search.FacetResults {
// facet unsupported for pre-filtering in KNN search
return nil
}
================================================
FILE: search/collector/heap.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"container/heap"
"github.com/blevesearch/bleve/v2/search"
)
type collectStoreHeap struct {
heap search.DocumentMatchCollection
compare collectorCompare
}
func newStoreHeap(capacity int, compare collectorCompare) *collectStoreHeap {
rv := &collectStoreHeap{
heap: make(search.DocumentMatchCollection, 0, capacity),
compare: compare,
}
heap.Init(rv)
return rv
}
func (c *collectStoreHeap) AddNotExceedingSize(doc *search.DocumentMatch,
size int) *search.DocumentMatch {
c.add(doc)
if c.Len() > size {
return c.removeLast()
}
return nil
}
func (c *collectStoreHeap) add(doc *search.DocumentMatch) {
heap.Push(c, doc)
}
func (c *collectStoreHeap) removeLast() *search.DocumentMatch {
return heap.Pop(c).(*search.DocumentMatch)
}
func (c *collectStoreHeap) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) {
count := c.Len()
size := count - skip
if size <= 0 {
return make(search.DocumentMatchCollection, 0), nil
}
rv := make(search.DocumentMatchCollection, size)
for i := size - 1; i >= 0; i-- {
doc := heap.Pop(c).(*search.DocumentMatch)
rv[i] = doc
err := fixup(doc)
if err != nil {
return nil, err
}
}
return rv, nil
}
func (c *collectStoreHeap) Internal() search.DocumentMatchCollection {
return c.heap
}
// heap interface implementation
func (c *collectStoreHeap) Len() int {
return len(c.heap)
}
func (c *collectStoreHeap) Less(i, j int) bool {
so := c.compare(c.heap[i], c.heap[j])
return -so < 0
}
func (c *collectStoreHeap) Swap(i, j int) {
c.heap[i], c.heap[j] = c.heap[j], c.heap[i]
}
func (c *collectStoreHeap) Push(x interface{}) {
c.heap = append(c.heap, x.(*search.DocumentMatch))
}
func (c *collectStoreHeap) Pop() interface{} {
var rv *search.DocumentMatch
rv, c.heap = c.heap[len(c.heap)-1], c.heap[:len(c.heap)-1]
return rv
}
================================================
FILE: search/collector/knn.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package collector
import (
"context"
"time"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
type collectStoreKNN struct {
internalHeaps []collectorStore
kValues []int64
allHits map[*search.DocumentMatch]struct{}
ejectedDocs map[*search.DocumentMatch]struct{}
}
func newStoreKNN(internalHeaps []collectorStore, kValues []int64) *collectStoreKNN {
return &collectStoreKNN{
internalHeaps: internalHeaps,
kValues: kValues,
ejectedDocs: make(map[*search.DocumentMatch]struct{}),
allHits: make(map[*search.DocumentMatch]struct{}),
}
}
// Adds a document to the collector store and returns the documents that were ejected
// from the store. The documents that were ejected from the store are the ones that
// were not in the top K documents for any of the heaps.
// These document are put back into the pool document match pool in the KNN Collector.
func (c *collectStoreKNN) AddDocument(doc *search.DocumentMatch) []*search.DocumentMatch {
for heapIdx := 0; heapIdx < len(c.internalHeaps); heapIdx++ {
if _, ok := doc.ScoreBreakdown[heapIdx]; !ok {
continue
}
ejectedDoc := c.internalHeaps[heapIdx].AddNotExceedingSize(doc, int(c.kValues[heapIdx]))
if ejectedDoc != nil {
delete(ejectedDoc.ScoreBreakdown, heapIdx)
c.ejectedDocs[ejectedDoc] = struct{}{}
}
}
var rv []*search.DocumentMatch
for doc := range c.ejectedDocs {
if len(doc.ScoreBreakdown) == 0 {
rv = append(rv, doc)
}
// clear out the ejectedDocs map to reuse it in the next AddDocument call
delete(c.ejectedDocs, doc)
}
return rv
}
func (c *collectStoreKNN) Final(fixup collectorFixup) (search.DocumentMatchCollection, error) {
for _, heap := range c.internalHeaps {
for _, doc := range heap.Internal() {
// duplicates may be present across the internal heaps
// meaning the same document match may be in the top K
// for multiple KNN queries.
c.allHits[doc] = struct{}{}
}
}
size := len(c.allHits)
if size <= 0 {
return make(search.DocumentMatchCollection, 0), nil
}
rv := make(search.DocumentMatchCollection, size)
i := 0
for doc := range c.allHits {
if fixup != nil {
err := fixup(doc)
if err != nil {
return nil, err
}
}
rv[i] = doc
i++
}
return rv, nil
}
func MakeKNNDocMatchHandler(ctx *search.SearchContext) (search.DocumentMatchHandler, error) {
var hc *KNNCollector
var ok bool
if hc, ok = ctx.Collector.(*KNNCollector); ok {
return func(d *search.DocumentMatch) error {
if d == nil {
return nil
}
toRelease := hc.knnStore.AddDocument(d)
for _, doc := range toRelease {
ctx.DocumentMatchPool.Put(doc)
}
return nil
}, nil
}
return nil, nil
}
func GetNewKNNCollectorStore(kArray []int64) *collectStoreKNN {
internalHeaps := make([]collectorStore, len(kArray))
for knnIdx, k := range kArray {
// TODO - Check if the datatype of k can be made into an int instead of int64
idx := knnIdx
internalHeaps[idx] = getOptimalCollectorStore(int(k), 0, func(i, j *search.DocumentMatch) int {
if i.ScoreBreakdown[idx] < j.ScoreBreakdown[idx] {
return 1
}
return -1
})
}
return newStoreKNN(internalHeaps, kArray)
}
// implements Collector interface
type KNNCollector struct {
knnStore *collectStoreKNN
size int
total uint64
took time.Duration
results search.DocumentMatchCollection
maxScore float64
}
func NewKNNCollector(kArray []int64, size int64) *KNNCollector {
return &KNNCollector{
knnStore: GetNewKNNCollectorStore(kArray),
size: int(size),
}
}
func (hc *KNNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
startTime := time.Now()
var err error
var next *search.DocumentMatch
// pre-allocate enough space in the DocumentMatchPool
// unless the sum of K is too large, then cap it
// everything should still work, just allocates DocumentMatches on demand
backingSize := hc.size
if backingSize > PreAllocSizeSkipCap {
backingSize = PreAllocSizeSkipCap + 1
}
searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), 0),
Collector: hc,
IndexReader: reader,
}
dmHandlerMakerKNN := MakeKNNDocMatchHandler
if cv := ctx.Value(search.MakeKNNDocumentMatchHandlerKey); cv != nil {
dmHandlerMakerKNN = cv.(search.MakeKNNDocumentMatchHandler)
}
// use the application given builder for making the custom document match
// handler and perform callbacks/invocations on the newly made handler.
dmHandler, err := dmHandlerMakerKNN(searchContext)
if err != nil {
return err
}
select {
case <-ctx.Done():
search.RecordSearchCost(ctx, search.AbortM, 0)
return ctx.Err()
default:
next, err = searcher.Next(searchContext)
}
for err == nil && next != nil {
if hc.total%CheckDoneEvery == 0 {
select {
case <-ctx.Done():
search.RecordSearchCost(ctx, search.AbortM, 0)
return ctx.Err()
default:
}
}
hc.total++
err = dmHandler(next)
if err != nil {
break
}
next, err = searcher.Next(searchContext)
}
if err != nil {
return err
}
// help finalize/flush the results in case
// of custom document match handlers.
err = dmHandler(nil)
if err != nil {
return err
}
// compute search duration
hc.took = time.Since(startTime)
// finalize actual results
err = hc.finalizeResults(reader)
if err != nil {
return err
}
return nil
}
func (hc *KNNCollector) finalizeResults(r index.IndexReader) error {
var err error
hc.results, err = hc.knnStore.Final(func(doc *search.DocumentMatch) error {
if doc.ID == "" {
// look up the id since we need it for lookup
var err error
doc.ID, err = r.ExternalID(doc.IndexInternalID)
if err != nil {
return err
}
}
return nil
})
return err
}
func (hc *KNNCollector) Results() search.DocumentMatchCollection {
return hc.results
}
func (hc *KNNCollector) Total() uint64 {
return hc.total
}
func (hc *KNNCollector) MaxScore() float64 {
return hc.maxScore
}
func (hc *KNNCollector) Took() time.Duration {
return hc.took
}
func (hc *KNNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
// facet unsupported for vector search
}
func (hc *KNNCollector) FacetResults() search.FacetResults {
// facet unsupported for vector search
return nil
}
================================================
FILE: search/collector/list.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"container/list"
"github.com/blevesearch/bleve/v2/search"
)
type collectStoreList struct {
results *list.List
compare collectorCompare
}
func newStoreList(capacity int, compare collectorCompare) *collectStoreList {
rv := &collectStoreList{
results: list.New(),
compare: compare,
}
return rv
}
func (c *collectStoreList) AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch {
c.add(doc)
if c.len() > size {
return c.removeLast()
}
return nil
}
func (c *collectStoreList) add(doc *search.DocumentMatch) {
for e := c.results.Front(); e != nil; e = e.Next() {
curr := e.Value.(*search.DocumentMatch)
if c.compare(doc, curr) >= 0 {
c.results.InsertBefore(doc, e)
return
}
}
// if we got to the end, we still have to add it
c.results.PushBack(doc)
}
func (c *collectStoreList) removeLast() *search.DocumentMatch {
return c.results.Remove(c.results.Front()).(*search.DocumentMatch)
}
func (c *collectStoreList) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) {
if c.results.Len()-skip > 0 {
rv := make(search.DocumentMatchCollection, c.results.Len()-skip)
i := 0
skipped := 0
for e := c.results.Back(); e != nil; e = e.Prev() {
if skipped < skip {
skipped++
continue
}
rv[i] = e.Value.(*search.DocumentMatch)
err := fixup(rv[i])
if err != nil {
return nil, err
}
i++
}
return rv, nil
}
return search.DocumentMatchCollection{}, nil
}
func (c *collectStoreList) Internal() search.DocumentMatchCollection {
rv := make(search.DocumentMatchCollection, c.results.Len())
i := 0
for e := c.results.Front(); e != nil; e = e.Next() {
rv[i] = e.Value.(*search.DocumentMatch)
i++
}
return rv
}
func (c *collectStoreList) len() int {
return c.results.Len()
}
================================================
FILE: search/collector/nested.go
================================================
// Copyright (c) 2026 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
type collectStoreNested struct {
// descAdder is used to customize how descendants are merged into their parent
descAdder search.DescendantAdderCallbackFn
// nested reader to retrieve ancestor information
nr index.NestedReader
// the current root document match being built
currRoot *search.DocumentMatch
// the ancestor ID of the current root document being built
currRootAncestorID index.AncestorID
// prealloc slice for ancestor IDs
ancestors []index.AncestorID
}
func newStoreNested(nr index.NestedReader, descAdder search.DescendantAdderCallbackFn) *collectStoreNested {
rv := &collectStoreNested{
descAdder: descAdder,
nr: nr,
}
return rv
}
// ProcessNestedDocument adds a document to the nested store, merging it into its root document
// as needed. If the returned DocumentMatch is nil, the incoming doc has been merged
// into its parent and should not be processed further. If the returned DocumentMatch
// is non-nil, it represents a complete root document that should be processed further.
// NOTE: This implementation assumes that documents are added in increasing order of their internal IDs
// which is guaranteed by all searchers in bleve.
func (c *collectStoreNested) ProcessNestedDocument(ctx *search.SearchContext, doc *search.DocumentMatch) (*search.DocumentMatch, error) {
// find ancestors for the doc
var err error
c.ancestors, err = c.nr.Ancestors(doc.IndexInternalID, c.ancestors[:0])
if err != nil {
return nil, err
}
if len(c.ancestors) == 0 {
// should not happen, every doc should have at least itself as ancestor
return nil, nil
}
// root docID is the last ancestor
rootID := c.ancestors[len(c.ancestors)-1]
// check if there is an interim root already and if the incoming doc belongs to it
if c.currRoot != nil && c.currRootAncestorID.Equals(rootID) {
// there is an interim root already, and the incoming doc belongs to it
if err := c.descAdder(c.currRoot, doc); err != nil {
return nil, err
}
// recycle the child document now that it's merged into the interim root
ctx.DocumentMatchPool.Put(doc)
return nil, nil
}
// completedRoot is the root document match to return, if any
var completedRoot *search.DocumentMatch
if c.currRoot != nil {
// we have an existing interim root, return it for processing
completedRoot = c.currRoot
}
// no interim root for now so either we have a root document incoming
// or we have a child doc and need to create an interim root
if len(c.ancestors) == 1 {
// incoming doc is the root itself
c.currRoot = doc
c.currRootAncestorID = rootID
return completedRoot, nil
}
// this is a child doc, create interim root
newDM := ctx.DocumentMatchPool.Get()
newDM.IndexInternalID = rootID.ToIndexInternalID(newDM.IndexInternalID)
// merge the incoming doc into the new interim root
c.currRoot = newDM
c.currRootAncestorID = rootID
if err := c.descAdder(c.currRoot, doc); err != nil {
return nil, err
}
// recycle the child document now that it's merged into the interim root
ctx.DocumentMatchPool.Put(doc)
return completedRoot, nil
}
// Current returns the current interim root document match being built, if any
func (c *collectStoreNested) Current() *search.DocumentMatch {
return c.currRoot
}
================================================
FILE: search/collector/search_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"context"
"reflect"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
type stubSearcher struct {
index int
matches []*search.DocumentMatch
}
func (ss *stubSearcher) SetBytesRead(val uint64) {
}
func (ss *stubSearcher) BytesRead() uint64 {
return 0
}
func (ss *stubSearcher) Size() int {
sizeInBytes := int(reflect.TypeOf(*ss).Size())
for _, entry := range ss.matches {
if entry != nil {
sizeInBytes += entry.Size()
}
}
return sizeInBytes
}
func (ss *stubSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if ss.index < len(ss.matches) {
rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = ss.matches[ss.index].IndexInternalID
rv.Score = ss.matches[ss.index].Score
ss.index++
return rv, nil
}
return nil, nil
}
func (ss *stubSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
for ss.index < len(ss.matches) && ss.matches[ss.index].IndexInternalID.Compare(ID) < 0 {
ss.index++
}
if ss.index < len(ss.matches) {
rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = ss.matches[ss.index].IndexInternalID
rv.Score = ss.matches[ss.index].Score
ss.index++
return rv, nil
}
return nil, nil
}
func (ss *stubSearcher) Close() error {
return nil
}
func (ss *stubSearcher) Weight() float64 {
return 0.0
}
func (ss *stubSearcher) SetQueryNorm(float64) {
}
func (ss *stubSearcher) Count() uint64 {
return uint64(len(ss.matches))
}
func (ss *stubSearcher) Min() int {
return 0
}
func (ss *stubSearcher) DocumentMatchPoolSize() int {
return 0
}
type stubReader struct{}
func (sr *stubReader) Size() int {
return 0
}
func (sr *stubReader) TermFieldReader(ctx context.Context, term []byte, field string, includeFreq, includeNorm, includeTermVectors bool) (index.TermFieldReader, error) {
return nil, nil
}
func (sr *stubReader) DocIDReaderAll() (index.DocIDReader, error) {
return nil, nil
}
func (sr *stubReader) DocIDReaderOnly(ids []string) (index.DocIDReader, error) {
return nil, nil
}
func (sr *stubReader) FieldDict(field string) (index.FieldDict, error) {
return nil, nil
}
func (sr *stubReader) FieldDictRange(field string, startTerm []byte, endTerm []byte) (index.FieldDict, error) {
return nil, nil
}
func (sr *stubReader) FieldDictPrefix(field string, termPrefix []byte) (index.FieldDict, error) {
return nil, nil
}
func (sr *stubReader) Document(id string) (index.Document, error) {
return nil, nil
}
func (sr *stubReader) DocumentVisitFieldTerms(id index.IndexInternalID, fields []string, visitor index.DocValueVisitor) error {
return nil
}
func (sr *stubReader) Fields() ([]string, error) {
return nil, nil
}
func (sr *stubReader) GetInternal(key []byte) ([]byte, error) {
return nil, nil
}
func (sr *stubReader) DocCount() (uint64, error) {
return 0, nil
}
func (sr *stubReader) ExternalID(id index.IndexInternalID) (string, error) {
return string(id), nil
}
func (sr *stubReader) InternalID(id string) (index.IndexInternalID, error) {
return []byte(id), nil
}
func (sr *stubReader) DumpAll() chan interface{} {
return nil
}
func (sr *stubReader) DumpDoc(id string) chan interface{} {
return nil
}
func (sr *stubReader) DumpFields() chan interface{} {
return nil
}
func (sr *stubReader) Close() error {
return nil
}
func (sr *stubReader) DocValueReader(fields []string) (index.DocValueReader, error) {
return &DocValueReader{i: sr, fields: fields}, nil
}
type DocValueReader struct {
i *stubReader
fields []string
}
func (dvr *DocValueReader) VisitDocValues(id index.IndexInternalID, visitor index.DocValueVisitor) error {
return dvr.i.DocumentVisitFieldTerms(id, dvr.fields, visitor)
}
func (dvr *DocValueReader) BytesRead() uint64 {
return 0
}
================================================
FILE: search/collector/slice.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"github.com/blevesearch/bleve/v2/search"
)
type collectStoreSlice struct {
slice search.DocumentMatchCollection
compare collectorCompare
}
func newStoreSlice(capacity int, compare collectorCompare) *collectStoreSlice {
rv := &collectStoreSlice{
slice: make(search.DocumentMatchCollection, 0, capacity),
compare: compare,
}
return rv
}
func (c *collectStoreSlice) AddNotExceedingSize(doc *search.DocumentMatch,
size int) *search.DocumentMatch {
c.add(doc)
if c.len() > size {
return c.removeLast()
}
return nil
}
func (c *collectStoreSlice) add(doc *search.DocumentMatch) {
// find where to insert, starting at end (lowest)
i := len(c.slice)
for ; i > 0; i-- {
cmp := c.compare(doc, c.slice[i-1])
if cmp >= 0 {
break
}
}
// insert at i
c.slice = append(c.slice, nil)
copy(c.slice[i+1:], c.slice[i:])
c.slice[i] = doc
}
func (c *collectStoreSlice) removeLast() *search.DocumentMatch {
var rv *search.DocumentMatch
rv, c.slice = c.slice[len(c.slice)-1], c.slice[:len(c.slice)-1]
return rv
}
func (c *collectStoreSlice) Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error) {
for i := skip; i < len(c.slice); i++ {
err := fixup(c.slice[i])
if err != nil {
return nil, err
}
}
if skip <= len(c.slice) {
return c.slice[skip:], nil
}
return search.DocumentMatchCollection{}, nil
}
func (c *collectStoreSlice) Internal() search.DocumentMatchCollection {
return c.slice
}
func (c *collectStoreSlice) len() int {
return len(c.slice)
}
================================================
FILE: search/collector/topn.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"context"
"reflect"
"strconv"
"time"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeTopNCollector int
func init() {
var coll TopNCollector
reflectStaticSizeTopNCollector = int(reflect.TypeOf(coll).Size())
}
type collectorStore interface {
// Add the document, and if the new store size exceeds the provided size
// the last element is removed and returned. If the size has not been
// exceeded, nil is returned.
AddNotExceedingSize(doc *search.DocumentMatch, size int) *search.DocumentMatch
Final(skip int, fixup collectorFixup) (search.DocumentMatchCollection, error)
// Provide access the internal heap implementation
Internal() search.DocumentMatchCollection
}
// PreAllocSizeSkipCap will cap preallocation to this amount when
// size+skip exceeds this value
var PreAllocSizeSkipCap = 1000
type collectorCompare func(i, j *search.DocumentMatch) int
type collectorFixup func(d *search.DocumentMatch) error
// TopNCollector collects the top N hits, optionally skipping some results
type TopNCollector struct {
size int
skip int
total uint64
bytesRead uint64
maxScore float64
took time.Duration
sort search.SortOrder
results search.DocumentMatchCollection
facetsBuilder *search.FacetsBuilder
store collectorStore
needDocIds bool
neededFields []string
cachedScoring []bool
cachedDesc []bool
lowestMatchOutsideResults *search.DocumentMatch
updateFieldVisitor index.DocValueVisitor
dvReader index.DocValueReader
searchAfter *search.DocumentMatch
knnHits map[string]*search.DocumentMatch
hybridMergeCallback search.HybridMergeCallbackFn
nestedStore *collectStoreNested
}
// CheckDoneEvery controls how frequently we check the context deadline
const CheckDoneEvery = uint64(1024)
// NewTopNCollector builds a collector to find the top 'size' hits
// skipping over the first 'skip' hits
// ordering hits by the provided sort order
func NewTopNCollector(size int, skip int, sort search.SortOrder) *TopNCollector {
return newTopNCollector(size, skip, sort, nil)
}
// NewTopNCollectorAfter builds a collector to find the top 'size' hits
// skipping over the first 'skip' hits
// ordering hits by the provided sort order
// starting after the provided 'after' sort values
func NewTopNCollectorAfter(size int, sort search.SortOrder, after []string) *TopNCollector {
rv := newTopNCollector(size, 0, sort, nil)
rv.searchAfter = createSearchAfterDocument(sort, after)
return rv
}
// NewNestedTopNCollector builds a collector to find the top 'size' hits
// skipping over the first 'skip' hits
// ordering hits by the provided sort order
// while ensuring the nested documents are handled correctly
// (i.e. parent document is returned instead of nested document)
func NewNestedTopNCollector(size int, skip int, sort search.SortOrder, nr index.NestedReader) *TopNCollector {
return newTopNCollector(size, skip, sort, nr)
}
// NewNestedTopNCollectorAfter builds a collector to find the top 'size' hits
// skipping over the first 'skip' hits
// ordering hits by the provided sort order
// starting after the provided 'after' sort values
// while ensuring the nested documents are handled correctly
// (i.e. parent document is returned instead of nested document)
func NewNestedTopNCollectorAfter(size int, sort search.SortOrder, after []string, nr index.NestedReader) *TopNCollector {
rv := newTopNCollector(size, 0, sort, nr)
rv.searchAfter = createSearchAfterDocument(sort, after)
return rv
}
func newTopNCollector(size int, skip int, sort search.SortOrder, nr index.NestedReader) *TopNCollector {
hc := &TopNCollector{size: size, skip: skip, sort: sort}
hc.store = getOptimalCollectorStore(size, skip, func(i, j *search.DocumentMatch) int {
return hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, i, j)
})
if nr != nil {
descAdder := func(parent, child *search.DocumentMatch) error {
// add descendant score to parent score
parent.Score += child.Score
// merge explanations
parent.Expl = parent.Expl.MergeWith(child.Expl)
// merge field term locations
parent.FieldTermLocations = search.MergeFieldTermLocationsFromMatch(parent.FieldTermLocations, child)
// add child's ID to parent's Descendants
// add other as descendant only if it is not the same document
if !parent.IndexInternalID.Equals(child.IndexInternalID) {
// Add a copy of child.IndexInternalID to descendants, because
// child.IndexInternalID will be reset when 'child' is recycled.
var descendantID index.IndexInternalID
// first check if parent's descendants slice has capacity to reuse
if len(parent.Descendants) < cap(parent.Descendants) {
// reuse the buffer element at len(parent.Descendants)
descendantID = parent.Descendants[:len(parent.Descendants)+1][len(parent.Descendants)]
}
// copy the contents of id into descendantID, allocating if needed
parent.Descendants = append(parent.Descendants, index.NewIndexInternalIDFrom(descendantID, child.IndexInternalID))
}
return nil
}
hc.nestedStore = newStoreNested(nr, search.DescendantAdderCallbackFn(descAdder))
}
// these lookups traverse an interface, so do once up-front
if sort.RequiresDocID() {
hc.needDocIds = true
}
hc.neededFields = sort.RequiredFields()
hc.cachedScoring = sort.CacheIsScore()
hc.cachedDesc = sort.CacheDescending()
return hc
}
// Creates a dummy document to compare with for pagination.
func createSearchAfterDocument(sort search.SortOrder, after []string) *search.DocumentMatch {
encodedAfter := make([]string, len(after))
for i, ss := range sort {
encodedAfter[i] = encodeSearchAfter(ss, after[i])
}
rv := &search.DocumentMatch{
Sort: encodedAfter,
}
for pos, ss := range sort {
if ss.RequiresDocID() {
rv.ID = after[pos]
}
if ss.RequiresScoring() {
if score, err := strconv.ParseFloat(after[pos], 64); err == nil {
rv.Score = score
}
}
}
return rv
}
// encodeSearchAfter applies prefix-coding to SearchAfter
// if required to enable pagination on numeric, datetime,
// and geo fields
func encodeSearchAfter(ss search.SearchSort, after string) string {
encodeFloat := func() string {
f64, _ := strconv.ParseFloat(after, 64) // error checking in SearchRequest.Validate
i64 := numeric.Float64ToInt64(f64)
return string(numeric.MustNewPrefixCodedInt64(i64, 0))
}
encodeDate := func() string {
t, _ := time.Parse(time.RFC3339Nano, after) // error checking in SearchRequest.Validate
i64 := t.UnixNano()
return string(numeric.MustNewPrefixCodedInt64(i64, 0))
}
switch ss := ss.(type) {
case *search.SortGeoDistance:
return encodeFloat()
case *search.SortField:
switch ss.Type {
case search.SortFieldAsNumber:
return encodeFloat()
case search.SortFieldAsDate:
return encodeDate()
default:
// For SortFieldAsString and SortFieldAuto
// NOTE: SortFieldAuto is used if you set Sort with a string
// or if the type of the field is not set in the object
// in the Sort slice. We cannot perform type inference in
// this case, so we return the original string, even if
// its actually numeric or date.
return after
}
default:
// For SortDocID and SortScore
return after
}
}
// Filter document matches based on the SearchAfter field in the SearchRequest.
func FilterHitsBySearchAfter(hits []*search.DocumentMatch, sort search.SortOrder, after []string) []*search.DocumentMatch {
if len(hits) == 0 {
return hits
}
// create a search after document
searchAfter := createSearchAfterDocument(sort, after)
// filter the hits
idx := 0
cachedScoring := sort.CacheIsScore()
cachedDesc := sort.CacheDescending()
for _, hit := range hits {
if sort.Compare(cachedScoring, cachedDesc, hit, searchAfter) > 0 {
hits[idx] = hit
idx++
}
}
return hits[:idx]
}
func getOptimalCollectorStore(size, skip int, comparator collectorCompare) collectorStore {
// pre-allocate space on the store to avoid reslicing
// unless the size + skip is too large, then cap it
// everything should still work, just reslices as necessary
backingSize := size + skip + 1
if size+skip > PreAllocSizeSkipCap {
backingSize = PreAllocSizeSkipCap + 1
}
if size+skip > 10 {
return newStoreHeap(backingSize, comparator)
} else {
return newStoreSlice(backingSize, comparator)
}
}
func (hc *TopNCollector) Size() int {
sizeInBytes := reflectStaticSizeTopNCollector + size.SizeOfPtr
if hc.facetsBuilder != nil {
sizeInBytes += hc.facetsBuilder.Size()
}
for _, entry := range hc.neededFields {
sizeInBytes += len(entry) + size.SizeOfString
}
sizeInBytes += len(hc.cachedScoring) + len(hc.cachedDesc)
return sizeInBytes
}
// Collect goes to the index to find the matching documents
func (hc *TopNCollector) Collect(ctx context.Context, searcher search.Searcher, reader index.IndexReader) error {
startTime := time.Now()
var err error
var next *search.DocumentMatch
// pre-allocate enough space in the DocumentMatchPool
// unless the size + skip is too large, then cap it
// everything should still work, just allocates DocumentMatches on demand
backingSize := hc.size + hc.skip + 1
if hc.size+hc.skip > PreAllocSizeSkipCap {
backingSize = PreAllocSizeSkipCap + 1
}
searchContext := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(backingSize+searcher.DocumentMatchPoolSize(), len(hc.sort)),
Collector: hc,
IndexReader: reader,
}
hc.dvReader, err = reader.DocValueReader(hc.neededFields)
if err != nil {
return err
}
hc.updateFieldVisitor = func(field string, term []byte) {
if hc.facetsBuilder != nil {
hc.facetsBuilder.UpdateVisitor(field, term)
}
hc.sort.UpdateVisitor(field, term)
}
dmHandlerMaker := MakeTopNDocumentMatchHandler
if cv := ctx.Value(search.MakeDocumentMatchHandlerKey); cv != nil {
dmHandlerMaker = cv.(search.MakeDocumentMatchHandler)
}
// use the application given builder for making the custom document match
// handler and perform callbacks/invocations on the newly made handler.
dmHandler, loadID, err := dmHandlerMaker(searchContext)
if err != nil {
return err
}
hc.needDocIds = hc.needDocIds || loadID
select {
case <-ctx.Done():
search.RecordSearchCost(ctx, search.AbortM, 0)
return ctx.Err()
default:
next, err = searcher.Next(searchContext)
}
// use a local totalDocs for counting total docs seen
// for context deadline checking, as hc.total is only
// incremented for actual(root) collected documents, and
// we need to check deadline for every document seen (root or nested)
var totalDocs uint64
for err == nil && next != nil {
if totalDocs%CheckDoneEvery == 0 {
select {
case <-ctx.Done():
search.RecordSearchCost(ctx, search.AbortM, 0)
return ctx.Err()
default:
}
}
totalDocs++
if hc.nestedStore != nil {
// This may be a nested document — add it to the nested store first.
// If the nested store returns nil, the document was merged into its parent
// and should not be processed further.
// If it returns a non-nil document, it represents a complete root document
// and should be processed further.
next, err = hc.nestedStore.ProcessNestedDocument(searchContext, next)
if err != nil {
break
}
}
if next != nil {
err = hc.adjustDocumentMatch(searchContext, reader, next)
if err != nil {
break
}
err = hc.prepareDocumentMatch(searchContext, reader, next, false)
if err != nil {
break
}
err = dmHandler(next)
if err != nil {
break
}
}
next, err = searcher.Next(searchContext)
}
if err != nil {
return err
}
// if we have a nested store, we may have an interim root
// that needs to be returned for processing
if hc.nestedStore != nil {
currRoot := hc.nestedStore.Current()
if currRoot != nil {
err = hc.adjustDocumentMatch(searchContext, reader, currRoot)
if err != nil {
return err
}
// no descendants at this point
err = hc.prepareDocumentMatch(searchContext, reader, currRoot, false)
if err != nil {
return err
}
err = dmHandler(currRoot)
if err != nil {
return err
}
}
}
if hc.knnHits != nil {
// we may have some knn hits left that did not match any of the top N tf-idf hits
// we need to add them to the collector store to consider them as well.
for _, knnDoc := range hc.knnHits {
err = hc.prepareDocumentMatch(searchContext, reader, knnDoc, true)
if err != nil {
return err
}
err = dmHandler(knnDoc)
if err != nil {
return err
}
}
}
statsCallbackFn := ctx.Value(search.SearchIOStatsCallbackKey)
if statsCallbackFn != nil {
// hc.bytesRead corresponds to the
// total bytes read as part of docValues being read every hit
// which must be accounted by invoking the callback.
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(hc.bytesRead)
search.RecordSearchCost(ctx, search.AddM, hc.bytesRead)
}
// help finalize/flush the results in case
// of custom document match handlers.
err = dmHandler(nil)
if err != nil {
return err
}
// compute search duration
hc.took = time.Since(startTime)
// finalize actual results
err = hc.finalizeResults(reader)
if err != nil {
return err
}
return nil
}
var sortByScoreOpt = []string{"_score"}
func (hc *TopNCollector) adjustDocumentMatch(ctx *search.SearchContext,
reader index.IndexReader, d *search.DocumentMatch) (err error) {
if hc.knnHits != nil {
d.ID, err = reader.ExternalID(d.IndexInternalID)
if err != nil {
return err
}
if knnHit, ok := hc.knnHits[d.ID]; ok {
// we have a knn hit corresponding to this document
hc.hybridMergeCallback(d, knnHit)
// remove this knn hit from the map as it's already
// been merged
delete(hc.knnHits, d.ID)
}
}
return nil
}
func (hc *TopNCollector) prepareDocumentMatch(ctx *search.SearchContext,
reader index.IndexReader, d *search.DocumentMatch, isKnnDoc bool) (err error) {
// visit field terms for features that require it (sort, facets)
if !isKnnDoc && len(hc.neededFields) > 0 {
err = hc.visitFieldTerms(reader, d, hc.updateFieldVisitor)
if err != nil {
return err
}
} else if isKnnDoc && hc.facetsBuilder != nil {
// we need to visit the field terms for the knn document
// only for those fields that are required for faceting
// and not for sorting. This is because the knn document's
// sort value is already computed in the knn collector.
err = hc.visitFieldTerms(reader, d, func(field string, term []byte) {
if hc.facetsBuilder != nil {
hc.facetsBuilder.UpdateVisitor(field, term)
}
})
if err != nil {
return err
}
}
// increment total hits
hc.total++
d.HitNumber = hc.total
// update max score
if d.Score > hc.maxScore {
hc.maxScore = d.Score
}
// early exit as the document match had its sort value calculated in the knn
// collector itself
if isKnnDoc {
return nil
}
// see if we need to load ID (at this early stage, for example to sort on it)
if hc.needDocIds && d.ID == "" {
d.ID, err = reader.ExternalID(d.IndexInternalID)
if err != nil {
return err
}
}
// compute this hits sort value
if len(hc.sort) == 1 && hc.cachedScoring[0] {
d.Sort = sortByScoreOpt
} else {
hc.sort.Value(d)
}
return nil
}
func MakeTopNDocumentMatchHandler(
ctx *search.SearchContext) (search.DocumentMatchHandler, bool, error) {
var hc *TopNCollector
var ok bool
if hc, ok = ctx.Collector.(*TopNCollector); ok {
return func(d *search.DocumentMatch) error {
if d == nil {
return nil
}
// support search after based pagination,
// if this hit is <= the search after sort key
// we should skip it
if hc.searchAfter != nil {
// exact sort order matches use hit number to break tie
// but we want to allow for exact match, so we pretend
hc.searchAfter.HitNumber = d.HitNumber
if hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d, hc.searchAfter) <= 0 {
ctx.DocumentMatchPool.Put(d)
return nil
}
}
// optimization, we track lowest sorting hit already removed from heap
// with this one comparison, we can avoid all heap operations if
// this hit would have been added and then immediately removed
if hc.lowestMatchOutsideResults != nil {
cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc, d,
hc.lowestMatchOutsideResults)
if cmp >= 0 {
// this hit can't possibly be in the result set, so avoid heap ops
ctx.DocumentMatchPool.Put(d)
return nil
}
}
removed := hc.store.AddNotExceedingSize(d, hc.size+hc.skip)
if removed != nil {
if hc.lowestMatchOutsideResults == nil {
hc.lowestMatchOutsideResults = removed
} else {
cmp := hc.sort.Compare(hc.cachedScoring, hc.cachedDesc,
removed, hc.lowestMatchOutsideResults)
if cmp < 0 {
tmp := hc.lowestMatchOutsideResults
hc.lowestMatchOutsideResults = removed
ctx.DocumentMatchPool.Put(tmp)
}
}
}
return nil
}, false, nil
}
return nil, false, nil
}
// visitFieldTerms is responsible for visiting the field terms of the
// search hit, and passing visited terms to the sort and facet builder
func (hc *TopNCollector) visitFieldTerms(reader index.IndexReader, d *search.DocumentMatch, v index.DocValueVisitor) error {
if hc.facetsBuilder != nil {
hc.facetsBuilder.StartDoc()
}
if d.ID != "" && d.IndexInternalID == nil {
// this document may have been sent over as preSearchData and
// we need to look up the internal id to visit the doc values for it
var err error
d.IndexInternalID, err = reader.InternalID(d.ID)
if err != nil {
return err
}
}
// first visit descendants if any
for _, descID := range d.Descendants {
err := hc.dvReader.VisitDocValues(descID, v)
if err != nil {
return err
}
}
// now visit the doc values for this document
err := hc.dvReader.VisitDocValues(d.IndexInternalID, v)
if hc.facetsBuilder != nil {
hc.facetsBuilder.EndDoc()
}
hc.bytesRead += hc.dvReader.BytesRead()
return err
}
// SetFacetsBuilder registers a facet builder for this collector
func (hc *TopNCollector) SetFacetsBuilder(facetsBuilder *search.FacetsBuilder) {
hc.facetsBuilder = facetsBuilder
fieldsRequiredForFaceting := facetsBuilder.RequiredFields()
// for each of these fields, append only if not already there in hc.neededFields.
for _, field := range fieldsRequiredForFaceting {
found := false
for _, neededField := range hc.neededFields {
if field == neededField {
found = true
break
}
}
if !found {
hc.neededFields = append(hc.neededFields, field)
}
}
}
// finalizeResults starts with the heap containing the final top size+skip
// it now throws away the results to be skipped
// and does final doc id lookup (if necessary)
func (hc *TopNCollector) finalizeResults(r index.IndexReader) error {
var err error
hc.results, err = hc.store.Final(hc.skip, func(doc *search.DocumentMatch) error {
if doc.ID == "" {
// look up the id since we need it for lookup
var err error
doc.ID, err = r.ExternalID(doc.IndexInternalID)
if err != nil {
return err
}
}
doc.Complete(nil)
return nil
})
return err
}
// Results returns the collected hits
func (hc *TopNCollector) Results() search.DocumentMatchCollection {
return hc.results
}
// Total returns the total number of hits
func (hc *TopNCollector) Total() uint64 {
return hc.total
}
// MaxScore returns the maximum score seen across all the hits
func (hc *TopNCollector) MaxScore() float64 {
return hc.maxScore
}
// Took returns the time spent collecting hits
func (hc *TopNCollector) Took() time.Duration {
return hc.took
}
// FacetResults returns the computed facets results
func (hc *TopNCollector) FacetResults() search.FacetResults {
if hc.facetsBuilder != nil {
return hc.facetsBuilder.Results()
}
return nil
}
func (hc *TopNCollector) SetKNNHits(knnHits search.DocumentMatchCollection, hybridMergeCallback search.HybridMergeCallbackFn) {
hc.knnHits = make(map[string]*search.DocumentMatch, len(knnHits))
for _, hit := range knnHits {
hc.knnHits[hit.ID] = hit
}
hc.hybridMergeCallback = hybridMergeCallback
}
================================================
FILE: search/collector/topn_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package collector
import (
"bytes"
"context"
"strconv"
"testing"
"time"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/facet"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
func TestTop10Scores(t *testing.T) {
// a stub search with more than 10 matches
// the top-10 scores are > 10
// everything else is less than 10
searcher := &stubSearcher{
matches: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("a"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("b"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("c"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("d"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("e"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("f"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("g"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("h"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("i"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("j"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("k"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("l"),
Score: 99,
},
{
IndexInternalID: index.IndexInternalID("m"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("n"),
Score: 11,
},
},
}
collector := NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
}
maxScore := collector.MaxScore()
if maxScore != 99.0 {
t.Errorf("expected max score 99.0, got %f", maxScore)
}
total := collector.Total()
if total != 14 {
t.Errorf("expected 14 total results, got %d", total)
}
results := collector.Results()
if len(results) != 10 {
t.Logf("results: %v", results)
t.Fatalf("expected 10 results, got %d", len(results))
}
if results[0].ID != "l" {
t.Errorf("expected first result to have ID 'l', got %s", results[0].ID)
}
if results[0].Score != 99.0 {
t.Errorf("expected highest score to be 99.0, got %f", results[0].Score)
}
minScore := 1000.0
for _, result := range results {
if result.Score < minScore {
minScore = result.Score
}
}
if minScore < 10 {
t.Errorf("expected minimum score to be higher than 10, got %f", minScore)
}
}
func TestTop10ScoresSkip10(t *testing.T) {
// a stub search with more than 10 matches
// the top-10 scores are > 10
// everything else is less than 10
searcher := &stubSearcher{
matches: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("a"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("b"),
Score: 9.5,
},
{
IndexInternalID: index.IndexInternalID("c"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("d"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("e"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("f"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("g"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("h"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("i"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("j"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("k"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("l"),
Score: 99,
},
{
IndexInternalID: index.IndexInternalID("m"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("n"),
Score: 11,
},
},
}
collector := NewTopNCollector(10, 10, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
}
maxScore := collector.MaxScore()
if maxScore != 99.0 {
t.Errorf("expected max score 99.0, got %f", maxScore)
}
total := collector.Total()
if total != 14 {
t.Errorf("expected 14 total results, got %d", total)
}
results := collector.Results()
if len(results) != 4 {
t.Fatalf("expected 4 results, got %d", len(results))
}
if results[0].ID != "b" {
t.Errorf("expected first result to have ID 'b', got %s", results[0].ID)
}
if results[0].Score != 9.5 {
t.Errorf("expected highest score to be 9.5, got %f", results[0].Score)
}
}
func TestTop10ScoresSkip10Only9Hits(t *testing.T) {
// a stub search with only 10 matches
searcher := &stubSearcher{
matches: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("a"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("c"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("e"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("g"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("i"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("j"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("k"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("m"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("n"),
Score: 11,
},
},
}
collector := NewTopNCollector(10, 10, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
}
total := collector.Total()
if total != 9 {
t.Errorf("expected 9 total results, got %d", total)
}
results := collector.Results()
if len(results) != 0 {
t.Fatalf("expected 0 results, got %d", len(results))
}
}
func TestPaginationSameScores(t *testing.T) {
// a stub search with more than 10 matches
// all documents have the same score
searcher := &stubSearcher{
matches: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("a"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("b"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("c"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("d"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("e"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("f"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("g"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("h"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("i"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("j"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("k"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("l"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("m"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("n"),
Score: 5,
},
},
}
// first get first 5 hits
collector := NewTopNCollector(5, 0, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
}
total := collector.Total()
if total != 14 {
t.Errorf("expected 14 total results, got %d", total)
}
results := collector.Results()
if len(results) != 5 {
t.Fatalf("expected 5 results, got %d", len(results))
}
firstResults := make(map[string]struct{})
for _, hit := range results {
firstResults[hit.ID] = struct{}{}
}
// a stub search with more than 10 matches
// all documents have the same score
searcher = &stubSearcher{
matches: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("a"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("b"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("c"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("d"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("e"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("f"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("g"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("h"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("i"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("j"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("k"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("l"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("m"),
Score: 5,
},
{
IndexInternalID: index.IndexInternalID("n"),
Score: 5,
},
},
}
// now get next 5 hits
collector = NewTopNCollector(5, 5, search.SortOrder{&search.SortScore{Desc: true}})
err = collector.Collect(context.Background(), searcher, &stubReader{})
if err != nil {
t.Fatal(err)
}
total = collector.Total()
if total != 14 {
t.Errorf("expected 14 total results, got %d", total)
}
results = collector.Results()
if len(results) != 5 {
t.Fatalf("expected 5 results, got %d", len(results))
}
// make sure that none of these hits repeat ones we saw in the top 5
for _, hit := range results {
if _, ok := firstResults[hit.ID]; ok {
t.Errorf("doc ID %s is in top 5 and next 5 result sets", hit.ID)
}
}
}
// TestStreamResults verifies the search.DocumentMatchHandler
func TestStreamResults(t *testing.T) {
matches := []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("a"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("b"),
Score: 1,
},
{
IndexInternalID: index.IndexInternalID("c"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("d"),
Score: 999,
},
{
IndexInternalID: index.IndexInternalID("e"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("f"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("g"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("h"),
Score: 89,
},
{
IndexInternalID: index.IndexInternalID("i"),
Score: 101,
},
{
IndexInternalID: index.IndexInternalID("j"),
Score: 112,
},
{
IndexInternalID: index.IndexInternalID("k"),
Score: 10,
},
{
IndexInternalID: index.IndexInternalID("l"),
Score: 99,
},
{
IndexInternalID: index.IndexInternalID("m"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("n"),
Score: 111,
},
}
searcher := &stubSearcher{
matches: matches,
}
ind := 0
docMatchHandler := func(hit *search.DocumentMatch) error {
if hit == nil {
return nil // search completed
}
if !bytes.Equal(hit.IndexInternalID, matches[ind].IndexInternalID) {
t.Errorf("%d hit IndexInternalID actual: %s, expected: %s",
ind, hit.IndexInternalID, matches[ind].IndexInternalID)
}
if hit.Score != matches[ind].Score {
t.Errorf("%d hit Score actual: %s, expected: %s",
ind, hit.IndexInternalID, matches[ind].IndexInternalID)
}
ind++
return nil
}
var handlerMaker search.MakeDocumentMatchHandler = func(ctx *search.SearchContext) (search.DocumentMatchHandler, bool, error) {
return docMatchHandler, false, nil
}
ctx := context.WithValue(context.Background(), search.MakeDocumentMatchHandlerKey, handlerMaker)
collector := NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(ctx, searcher, &stubReader{})
if err != nil {
t.Fatal(err)
}
maxScore := collector.MaxScore()
if maxScore != 999.0 {
t.Errorf("expected max score 99.0, got %f", maxScore)
}
total := collector.Total()
if int(total) != ind {
t.Errorf("expected 14 total results, got %d", total)
}
results := collector.Results()
if len(results) != 0 {
t.Fatalf("expected 0 results, got %d", len(results))
}
}
// TestCollectorChaining verifies the chaining of collectors.
// The custom DocumentMatchHandler can process every hit for
// the search query and then pass the hit to the topn collector
// to eventually have the sorted top `N` results.
func TestCollectorChaining(t *testing.T) {
matches := []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("a"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("b"),
Score: 1,
},
{
IndexInternalID: index.IndexInternalID("c"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("d"),
Score: 999,
},
{
IndexInternalID: index.IndexInternalID("e"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("f"),
Score: 9,
},
{
IndexInternalID: index.IndexInternalID("g"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("h"),
Score: 89,
},
{
IndexInternalID: index.IndexInternalID("i"),
Score: 101,
},
{
IndexInternalID: index.IndexInternalID("j"),
Score: 112,
},
{
IndexInternalID: index.IndexInternalID("k"),
Score: 10,
},
{
IndexInternalID: index.IndexInternalID("l"),
Score: 99,
},
{
IndexInternalID: index.IndexInternalID("m"),
Score: 11,
},
{
IndexInternalID: index.IndexInternalID("n"),
Score: 111,
},
}
searcher := &stubSearcher{
matches: matches,
}
var topNHandler search.DocumentMatchHandler
ind := 0
docMatchHandler := func(hit *search.DocumentMatch) error {
if hit == nil {
return nil // search completed
}
if !bytes.Equal(hit.IndexInternalID, matches[ind].IndexInternalID) {
t.Errorf("%d hit IndexInternalID actual: %s, expected: %s",
ind, hit.IndexInternalID, matches[ind].IndexInternalID)
}
if hit.Score != matches[ind].Score {
t.Errorf("%d hit Score actual: %s, expected: %s",
ind, hit.IndexInternalID, matches[ind].IndexInternalID)
}
ind++
// give the hit back to the topN collector
err := topNHandler(hit)
if err != nil {
t.Errorf("unexpected err: %v", err)
}
return nil
}
var handlerMaker search.MakeDocumentMatchHandler = func(ctx *search.SearchContext) (search.DocumentMatchHandler, bool, error) {
topNHandler, _, _ = MakeTopNDocumentMatchHandler(ctx)
return docMatchHandler, false, nil
}
ctx := context.WithValue(context.Background(), search.MakeDocumentMatchHandlerKey,
handlerMaker)
collector := NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
err := collector.Collect(ctx, searcher, &stubReader{})
if err != nil {
t.Fatal(err)
}
maxScore := collector.MaxScore()
if maxScore != 999.0 {
t.Errorf("expected max score 99.0, got %f", maxScore)
}
total := collector.Total()
if int(total) != ind {
t.Errorf("expected 14 total results, got %d", total)
}
results := collector.Results()
if len(results) != 10 { // as it is paged
t.Fatalf("expected 0 results, got %d", len(results))
}
if results[0].ID != "d" {
t.Errorf("expected first result to have ID 'l', got %s", results[0].ID)
}
if results[0].Score != 999.0 {
t.Errorf("expected highest score to be 999.0, got %f", results[0].Score)
}
minScore := 1000.0
for _, result := range results {
if result.Score < minScore {
minScore = result.Score
}
}
if minScore < 10 {
t.Errorf("expected minimum score to be higher than 10, got %f", minScore)
}
}
func setupIndex(t *testing.T) index.Index {
analysisQueue := index.NewAnalysisQueue(1)
i, err := scorch.NewScorch(
scorch.Name,
map[string]interface{}{
"path": "",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
return i
}
func TestSetFacetsBuilder(t *testing.T) {
// Field common to both sorting and faceting.
sortFacetsField := "locations"
coll := NewTopNCollector(10, 0, search.SortOrder{&search.SortField{Field: sortFacetsField}})
i := setupIndex(t)
indexReader, err := i.Reader()
if err != nil {
t.Fatal(err)
}
fb := search.NewFacetsBuilder(indexReader)
facetBuilder := facet.NewTermsFacetBuilder(sortFacetsField, 100)
fb.Add("locations_facet", facetBuilder)
coll.SetFacetsBuilder(fb)
// Should not duplicate the "locations" field in the collector.
if len(coll.neededFields) != 1 || coll.neededFields[0] != sortFacetsField {
t.Errorf("expected fields in collector: %v, observed: %v", []string{sortFacetsField}, coll.neededFields)
}
}
func TestSearchAfterNumeric(t *testing.T) {
idx := setupIndex(t)
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
docs := []struct {
id string
data int64
}{
{"a", 10},
{"b", 9},
{"c", 8},
{"d", 7},
{"e", 6},
{"f", 5},
{"g", 4},
{"h", 3},
{"i", 2},
{"j", 1},
}
batch := index.NewBatch()
for _, d := range docs {
doc := document.NewDocument(d.id)
field := document.NewNumericFieldWithIndexingOptions("data", []uint64{}, float64(d.data), index.IndexField|index.StoreField|index.IncludeTermVectors)
doc.AddField(field)
batch.Update(doc)
}
err := idx.Batch(batch)
if err != nil {
t.Fatal(err)
}
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
defer func() {
err := reader.Close()
if err != nil {
t.Fatal(err)
}
}()
searcher, err := searcher.NewMatchAllSearcher(context.Background(), reader, 1.0, search.SearcherOptions{})
if err != nil {
t.Fatal(err)
}
sortOrder := search.SortOrder{&search.SortField{Field: "data", Type: search.SortFieldAsNumber, Desc: true}}
after := []string{"6"}
collectorAfter := NewTopNCollectorAfter(5, sortOrder, after)
err = collectorAfter.Collect(context.Background(), searcher, reader)
if err != nil {
t.Fatal(err)
}
resultsAfter := collectorAfter.Results()
if len(resultsAfter) != 5 {
t.Fatalf("expected 5 results, got %d", len(resultsAfter))
}
for i := range resultsAfter {
raID := resultsAfter[i].ID
docID := docs[i+len(resultsAfter)].id
if raID != docID {
t.Errorf("expected result '%s', got '%s'", docID, raID)
}
}
}
func TestSearchAfterDateTime(t *testing.T) {
idx := setupIndex(t)
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
docs := []struct {
id string
data time.Time
}{
{"a", time.Unix(10, 0).UTC()},
{"b", time.Unix(9, 0).UTC()},
{"c", time.Unix(8, 0).UTC()},
{"d", time.Unix(7, 0).UTC()},
{"e", time.Unix(6, 0).UTC()},
{"f", time.Unix(5, 0).UTC()},
{"g", time.Unix(4, 0).UTC()},
{"h", time.Unix(3, 0).UTC()},
{"i", time.Unix(2, 0).UTC()},
{"j", time.Unix(1, 0).UTC()},
}
batch := index.NewBatch()
for _, d := range docs {
doc := document.NewDocument(d.id)
field, err := document.NewDateTimeFieldWithIndexingOptions("data", []uint64{}, d.data, time.RFC3339Nano, index.IndexField|index.StoreField|index.IncludeTermVectors)
if err != nil {
t.Fatal(err)
}
doc.AddField(field)
batch.Update(doc)
}
err := idx.Batch(batch)
if err != nil {
t.Fatal(err)
}
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
defer func() {
err := reader.Close()
if err != nil {
t.Fatal(err)
}
}()
searcher, err := searcher.NewMatchAllSearcher(context.Background(), reader, 1.0, search.SearcherOptions{})
if err != nil {
t.Fatal(err)
}
sortOrder := search.SortOrder{&search.SortField{Field: "data", Type: search.SortFieldAsDate, Desc: true}}
afterTime := time.Unix(6, 0).UTC()
after := []string{afterTime.Format(time.RFC3339Nano)}
collectorAfter := NewTopNCollectorAfter(5, sortOrder, after)
err = collectorAfter.Collect(context.Background(), searcher, reader)
if err != nil {
t.Fatal(err)
}
resultsAfter := collectorAfter.Results()
if len(resultsAfter) != 5 {
t.Fatalf("expected 5 results, got %d", len(resultsAfter))
}
for i := range resultsAfter {
raID := resultsAfter[i].ID
docID := docs[i+len(resultsAfter)].id
if raID != docID {
t.Errorf("expected result '%s', got '%s'", docID, raID)
}
}
}
func TestSearchAfterGeo(t *testing.T) {
idx := setupIndex(t)
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
docs := []struct {
id string
lon float64
lat float64
}{
{"a", 1, 0},
{"b", 2, 0},
{"c", 3, 0},
{"d", 4, 0},
{"e", 5, 0},
{"f", 6, 0},
{"g", 7, 0},
{"h", 8, 0},
{"i", 9, 0},
{"j", 10, 0},
}
batch := index.NewBatch()
for _, d := range docs {
doc := document.NewDocument(d.id)
field := document.NewGeoPointFieldWithIndexingOptions("location", []uint64{}, d.lon, d.lat, index.IndexField|index.StoreField|index.IncludeTermVectors)
doc.AddField(field)
batch.Update(doc)
}
err := idx.Batch(batch)
if err != nil {
t.Fatal(err)
}
reader, err := idx.Reader()
if err != nil {
t.Fatal(err)
}
defer func() {
err := reader.Close()
if err != nil {
t.Fatal(err)
}
}()
searcher, err := searcher.NewMatchAllSearcher(context.Background(), reader, 1.0, search.SearcherOptions{})
if err != nil {
t.Fatal(err)
}
centerLon, centerLat := 0.0, 0.0
sortOrder := search.SortOrder{&search.SortGeoDistance{Field: "location", Lon: centerLon, Lat: centerLat, Desc: false}}
// search after doc "e" which has lon 5, lat 0
afterLon, afterLat := 5.0, 0.0
afterDistance := geo.Haversin(centerLon, centerLat, afterLon, afterLat)
// to compensate scaling
afterDistance *= 1000
after := []string{strconv.FormatFloat(afterDistance, 'f', -1, 64)}
collectorAfter := NewTopNCollectorAfter(5, sortOrder, after)
err = collectorAfter.Collect(context.Background(), searcher, reader)
if err != nil {
t.Fatal(err)
}
resultsAfter := collectorAfter.Results()
if len(resultsAfter) != 5 {
t.Fatalf("expected 5 results, got %d", len(resultsAfter))
}
for i := range resultsAfter {
raID := resultsAfter[i].ID
docID := docs[i+len(resultsAfter)].id
if raID != docID {
t.Errorf("expected result '%s', got '%s'", docID, raID)
}
}
}
func BenchmarkTop10of0Scores(b *testing.B) {
benchHelper(0, func() search.Collector {
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop10of3Scores(b *testing.B) {
benchHelper(3, func() search.Collector {
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop10of10Scores(b *testing.B) {
benchHelper(10, func() search.Collector {
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop10of25Scores(b *testing.B) {
benchHelper(25, func() search.Collector {
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop10of50Scores(b *testing.B) {
benchHelper(50, func() search.Collector {
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop10of10000Scores(b *testing.B) {
benchHelper(10000, func() search.Collector {
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop100of0Scores(b *testing.B) {
benchHelper(0, func() search.Collector {
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop100of3Scores(b *testing.B) {
benchHelper(3, func() search.Collector {
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop100of10Scores(b *testing.B) {
benchHelper(10, func() search.Collector {
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop100of25Scores(b *testing.B) {
benchHelper(25, func() search.Collector {
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop100of50Scores(b *testing.B) {
benchHelper(50, func() search.Collector {
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop100of10000Scores(b *testing.B) {
benchHelper(10000, func() search.Collector {
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop1000of10000Scores(b *testing.B) {
benchHelper(10000, func() search.Collector {
return NewTopNCollector(1000, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop10000of100000Scores(b *testing.B) {
benchHelper(100000, func() search.Collector {
return NewTopNCollector(10000, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop10of100000Scores(b *testing.B) {
benchHelper(100000, func() search.Collector {
return NewTopNCollector(10, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop100of100000Scores(b *testing.B) {
benchHelper(100000, func() search.Collector {
return NewTopNCollector(100, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop1000of100000Scores(b *testing.B) {
benchHelper(100000, func() search.Collector {
return NewTopNCollector(1000, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
func BenchmarkTop10000of1000000Scores(b *testing.B) {
benchHelper(1000000, func() search.Collector {
return NewTopNCollector(10000, 0, search.SortOrder{&search.SortScore{Desc: true}})
}, b)
}
================================================
FILE: search/collector.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"context"
"time"
index "github.com/blevesearch/bleve_index_api"
)
type Collector interface {
Collect(ctx context.Context, searcher Searcher, reader index.IndexReader) error
Results() DocumentMatchCollection
Total() uint64
MaxScore() float64
Took() time.Duration
SetFacetsBuilder(facetsBuilder *FacetsBuilder)
FacetResults() FacetResults
}
// DocumentMatchHandler is the type of document match callback
// bleve will invoke during the search.
// Eventually, bleve will indicate the completion of an ongoing search,
// by passing a nil value for the document match callback.
// The application should take a copy of the hit/documentMatch
// if it wish to own it or need prolonged access to it.
type DocumentMatchHandler func(hit *DocumentMatch) error
type MakeDocumentMatchHandlerKeyType string
var MakeDocumentMatchHandlerKey = MakeDocumentMatchHandlerKeyType(
"MakeDocumentMatchHandlerKey")
var MakeKNNDocumentMatchHandlerKey = MakeDocumentMatchHandlerKeyType(
"MakeKNNDocumentMatchHandlerKey")
// MakeDocumentMatchHandler is an optional DocumentMatchHandler
// builder function which the applications can pass to bleve.
// These builder methods gives a DocumentMatchHandler function
// to bleve, which it will invoke on every document matches.
type MakeDocumentMatchHandler func(ctx *SearchContext) (
callback DocumentMatchHandler, loadID bool, err error)
type MakeKNNDocumentMatchHandler func(ctx *SearchContext) (
callback DocumentMatchHandler, err error)
================================================
FILE: search/explanation.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"encoding/json"
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/size"
)
var reflectStaticSizeExplanation int
func init() {
var e Explanation
reflectStaticSizeExplanation = int(reflect.TypeOf(e).Size())
}
const MergedExplMessage = "sum of merged explanations:"
type Explanation struct {
Value float64 `json:"value"`
Message string `json:"message"`
PartialMatch bool `json:"partial_match,omitempty"`
Children []*Explanation `json:"children,omitempty"`
}
func (expl *Explanation) String() string {
js, err := json.MarshalIndent(expl, "", " ")
if err != nil {
return fmt.Sprintf("error serializing explanation to json: %v", err)
}
return string(js)
}
func (expl *Explanation) Size() int {
sizeInBytes := reflectStaticSizeExplanation + size.SizeOfPtr +
len(expl.Message)
for _, entry := range expl.Children {
sizeInBytes += entry.Size()
}
return sizeInBytes
}
// MergeExpl merges two explanations into one.
// If either explanation is nil, the other is returned.
// If the first explanation is already a merged explanation,
// the second explanation is appended to its children.
// Otherwise, a new merged explanation is created
// with the two explanations as its children.
func (expl *Explanation) MergeWith(other *Explanation) *Explanation {
if expl == nil {
return other
}
if other == nil || expl == other {
return expl
}
newScore := expl.Value + other.Value
// if both are merged explanations, combine children
if expl.Message == MergedExplMessage && other.Message == MergedExplMessage {
expl.Value = newScore
expl.Children = append(expl.Children, other.Children...)
return expl
}
// atleast one is not a merged explanation see which one it is
// if expl is merged, append other
if expl.Message == MergedExplMessage {
// append other as a child to first
expl.Value = newScore
expl.Children = append(expl.Children, other)
return expl
}
// if other is merged, append expl
if other.Message == MergedExplMessage {
other.Value = newScore
other.Children = append(other.Children, expl)
return other
}
// create a new explanation to hold the merged one
rv := &Explanation{
Value: expl.Value + other.Value,
Message: MergedExplMessage,
Children: []*Explanation{expl, other},
}
return rv
}
================================================
FILE: search/facet/benchmark_data.txt
================================================
Boiling liquid expanding vapor explosion
From Wikipedia, the free encyclopedia
See also: Boiler explosion and Steam explosion
Flames subsequent to a flammable liquid BLEVE from a tanker. BLEVEs do not necessarily involve fire.
This article's tone or style may not reflect the encyclopedic tone used on Wikipedia. See Wikipedia's guide to writing better articles for suggestions. (July 2013)
A boiling liquid expanding vapor explosion (BLEVE, /ˈblɛviː/ blev-ee) is an explosion caused by the rupture of a vessel containing a pressurized liquid above its boiling point.[1]
Contents [hide]
1 Mechanism
1.1 Water example
1.2 BLEVEs without chemical reactions
2 Fires
3 Incidents
4 Safety measures
5 See also
6 References
7 External links
Mechanism[edit]
This section needs additional citations for verification. Please help improve this article by adding citations to reliable sources. Unsourced material may be challenged and removed. (July 2013)
There are three characteristics of liquids which are relevant to the discussion of a BLEVE:
If a liquid in a sealed container is boiled, the pressure inside the container increases. As the liquid changes to a gas it expands - this expansion in a vented container would cause the gas and liquid to take up more space. In a sealed container the gas and liquid are not able to take up more space and so the pressure rises. Pressurized vessels containing liquids can reach an equilibrium where the liquid stops boiling and the pressure stops rising. This occurs when no more heat is being added to the system (either because it has reached ambient temperature or has had a heat source removed).
The boiling temperature of a liquid is dependent on pressure - high pressures will yield high boiling temperatures, and low pressures will yield low boiling temperatures. A common simple experiment is to place a cup of water in a vacuum chamber, and then reduce the pressure in the chamber until the water boils. By reducing the pressure the water will boil even at room temperature. This works both ways - if the pressure is increased beyond normal atmospheric pressures, the boiling of hot water could be suppressed far beyond normal temperatures. The cooling system of a modern internal combustion engine is a real-world example.
When a liquid boils it turns into a gas. The resulting gas takes up far more space than the liquid did.
Typically, a BLEVE starts with a container of liquid which is held above its normal, atmospheric-pressure boiling temperature. Many substances normally stored as liquids, such as CO2, oxygen, and other similar industrial gases have boiling temperatures, at atmospheric pressure, far below room temperature. In the case of water, a BLEVE could occur if a pressurized chamber of water is heated far beyond the standard 100 °C (212 °F). That container, because the boiling water pressurizes it, is capable of holding liquid water at very high temperatures.
If the pressurized vessel, containing liquid at high temperature (which may be room temperature, depending on the substance) ruptures, the pressure which prevents the liquid from boiling is lost. If the rupture is catastrophic, where the vessel is immediately incapable of holding any pressure at all, then there suddenly exists a large mass of liquid which is at very high temperature and very low pressure. This causes the entire volume of liquid to instantaneously boil, which in turn causes an extremely rapid expansion. Depending on temperatures, pressures and the substance involved, that expansion may be so rapid that it can be classified as an explosion, fully capable of inflicting severe damage on its surroundings.
Water example[edit]
Imagine, for example, a tank of pressurized liquid water held at 204.4 °C (400 °F). This vessel would normally be pressurized to 1.7 MPa (250 psi) above atmospheric ("gauge") pressure. Were the tank containing the water to split open, there would momentarily exist a volume of liquid water which is
at atmospheric pressure, and
204.4 °C (400 °F).
At atmospheric pressure the boiling point of water is 100 °C (212 °F) - liquid water at atmospheric pressure cannot exist at temperatures higher than 100 °C (212 °F). It is obvious, then, that 204.4 °C (400 °F) liquid water at atmospheric pressure must immediately flash to gas causing an explosion.
BLEVEs without chemical reactions[edit]
It is important to note that a BLEVE need not be a chemical explosion - nor does there need to be a fire - however if a flammable substance is subject to a BLEVE it may also be subject to intense heating, either from an external source of heat which may have caused the vessel to rupture in the first place or from an internal source of localized heating such as skin friction. This heating can cause a flammable substance to ignite, adding a secondary explosion caused by the primary BLEVE. While blast effects of any BLEVE can be devastating, a flammable substance such as propane can add significantly to the danger.
Bleve explosion.svg
While the term BLEVE is most often used to describe the results of a container of flammable liquid rupturing due to fire, a BLEVE can occur even with a non-flammable substance such as water,[2] liquid nitrogen,[3] liquid helium or other refrigerants or cryogens, and therefore is not usually considered a type of chemical explosion.
Fires[edit]
BLEVEs can be caused by an external fire near the storage vessel causing heating of the contents and pressure build-up. While tanks are often designed to withstand great pressure, constant heating can cause the metal to weaken and eventually fail. If the tank is being heated in an area where there is no liquid, it may rupture faster without the liquid to absorb the heat. Gas containers are usually equipped with relief valves that vent off excess pressure, but the tank can still fail if the pressure is not released quickly enough.[1] Relief valves are sized to release pressure fast enough to prevent the pressure from increasing beyond the strength of the vessel, but not so fast as to be the cause of an explosion. An appropriately sized relief valve will allow the liquid inside to boil slowly, maintaining a constant pressure in the vessel until all the liquid has boiled and the vessel empties.
If the substance involved is flammable, it is likely that the resulting cloud of the substance will ignite after the BLEVE has occurred, forming a fireball and possibly a fuel-air explosion, also termed a vapor cloud explosion (VCE). If the materials are toxic, a large area will be contaminated.[4]
Incidents[edit]
The term "BLEVE" was coined by three researchers at Factory Mutual, in the analysis of an accident there in 1957 involving a chemical reactor vessel.[5]
In August 1959 the Kansas City Fire Department suffered its largest ever loss of life in the line of duty, when a 25,000 gallon (95,000 litre) gas tank exploded during a fire on Southwest Boulevard killing five firefighters. This was the first time BLEVE was used to describe a burning fuel tank.[citation needed]
Later incidents included the Cheapside Street Whisky Bond Fire in Glasgow, Scotland in 1960; Feyzin, France in 1966; Crescent City, Illinois in 1970; Kingman, Arizona in 1973; a liquid nitrogen tank rupture[6] at Air Products and Chemicals and Mobay Chemical Company at New Martinsville, West Virginia on January 31, 1978 [1];Texas City, Texas in 1978; Murdock, Illinois in 1983; San Juan Ixhuatepec, Mexico City in 1984; and Toronto, Ontario in 2008.
Safety measures[edit]
[icon] This section requires expansion. (July 2013)
Some fire mitigation measures are listed under liquefied petroleum gas.
See also[edit]
Boiler explosion
Expansion ratio
Explosive boiling or phase explosion
Rapid phase transition
Viareggio train derailment
2008 Toronto explosions
Gas carriers
Los Alfaques Disaster
Lac-Mégantic derailment
References[edit]
^ Jump up to: a b Kletz, Trevor (March 1990). Critical Aspects of Safety and Loss Prevention. London: Butterworth–Heinemann. pp. 43–45. ISBN 0-408-04429-2.
Jump up ^ "Temperature Pressure Relief Valves on Water Heaters: test, inspect, replace, repair guide". Inspect-ny.com. Retrieved 2011-07-12.
Jump up ^ Liquid nitrogen BLEVE demo
Jump up ^ "Chemical Process Safety" (PDF). Retrieved 2011-07-12.
Jump up ^ David F. Peterson, BLEVE: Facts, Risk Factors, and Fallacies, Fire Engineering magazine (2002).
Jump up ^ "STATE EX REL. VAPOR CORP. v. NARICK". Supreme Court of Appeals of West Virginia. 1984-07-12. Retrieved 2014-03-16.
External links[edit]
Look up boiling liquid expanding vapor explosion in Wiktionary, the free dictionary.
Wikimedia Commons has media related to BLEVE.
BLEVE Demo on YouTube — video of a controlled BLEVE demo
huge explosions on YouTube — video of propane and isobutane BLEVEs from a train derailment at Murdock, Illinois (3 September 1983)
Propane BLEVE on YouTube — video of BLEVE from the Toronto propane depot fire
Moscow Ring Road Accident on YouTube - Dozens of LPG tank BLEVEs after a road accident in Moscow
Kingman, AZ BLEVE — An account of the 5 July 1973 explosion in Kingman, with photographs
Propane Tank Explosions — Description of circumstances required to cause a propane tank BLEVE.
Analysis of BLEVE Events at DOE Sites - Details physics and mathematics of BLEVEs.
HID - SAFETY REPORT ASSESSMENT GUIDE: Whisky Maturation Warehouses - The liquor is aged in wooden barrels that can suffer BLEVE.
Categories: ExplosivesFirefightingFireTypes of fireGas technologiesIndustrial fires and explosions
Navigation menu
Create accountLog inArticleTalkReadEditView history
Main page
Contents
Featured content
Current events
Random article
Donate to Wikipedia
Wikimedia Shop
Interaction
Help
About Wikipedia
Community portal
Recent changes
Contact page
Tools
What links here
Related changes
Upload file
Special pages
Permanent link
Page information
Wikidata item
Cite this page
Print/export
Create a book
Download as PDF
Printable version
Languages
Català
Deutsch
Español
Français
Italiano
עברית
Nederlands
日本語
Norsk bokmål
Polski
Português
Русский
Suomi
Edit links
This page was last modified on 18 November 2014 at 01:35.
Text is available under the Creative Commons Attribution-ShareAlike License; additional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.
Privacy policyAbout WikipediaDisclaimersContact WikipediaDevelopersMobile viewWikimedia Foundation Powered by MediaWiki
Thermobaric weapon
From Wikipedia, the free encyclopedia
Blast from a US Navy fuel air explosive used against a decommissioned ship, USS McNulty, 1972.
A thermobaric weapon is a type of explosive that utilizes oxygen from the surrounding air to generate an intense, high-temperature explosion, and in practice the blast wave such a weapon produces is typically significantly longer in duration than a conventional condensed explosive. The fuel-air bomb is one of the most well-known types of thermobaric weapons.
Most conventional explosives consist of a fuel-oxidizer premix (gunpowder, for example, contains 25% fuel and 75% oxidizer), whereas thermobaric weapons are almost 100% fuel, so thermobaric weapons are significantly more energetic than conventional condensed explosives of equal weight. Their reliance on atmospheric oxygen makes them unsuitable for use underwater, at high altitude, and in adverse weather. They do, however, cause considerably more destruction when used inside confined environments such as tunnels, caves, and bunkers - partly due to the sustained blast wave, and partly by consuming the available oxygen inside those confined spaces.
There are many different types of thermobaric weapons rounds that can be fitted to hand-held launchers.[1]
Contents [hide]
1 Terminology
2 Mechanism
2.1 Fuel-air explosive
2.1.1 Effect
3 Development history
3.1 Soviet and Russian developments
3.2 US developments
4 History
4.1 Military use
4.2 Non-military use
5 See also
6 References
7 External links
Terminology[edit]
The term thermobaric is derived from the Greek words for "heat" and "pressure": thermobarikos (θερμοβαρικός), from thermos (θερμός), hot + baros (βάρος), weight, pressure + suffix -ikos (-ικός), suffix -ic.
Other terms used for this family of weapons are high-impulse thermobaric weapons (HITs), heat and pressure weapons, vacuum bombs, or fuel-air explosives (FAE or FAX).
Mechanism[edit]
In contrast to condensed explosive, where oxidation in a confined region produces a blast front from essentially a point source, a flame front accelerates to a large volume producing pressure fronts both within the mixture of fuel and oxidant and then in the surrounding air.[2]
Thermobaric explosives apply the principles underlying accidental unconfined vapor cloud explosions, which include those from dispersions of flammable dusts and droplets.[3] Previously, such explosions were most often encountered in flour mills and their storage containers, and later in coal mines; but, now, most commonly in discharged oil tankers and refineries, including an incident at Buncefield in the UK in 2005 where the blast wave woke people 150 kilometres (93 mi) from its centre.[4]
A typical weapon consists of a container packed with a fuel substance, in the center of which is a small conventional-explosive "scatter charge". Fuels are chosen on the basis of the exothermicity of their oxidation, ranging from powdered metals, such as aluminium or magnesium, to organic materials, possibly with a self-contained partial oxidant. The most recent development involves the use of nanofuels.[5][6]
A thermobaric bomb's effective yield requires the most appropriate combination of a number of factors; among these are how well the fuel is dispersed, how rapidly it mixes with the surrounding atmosphere, and the initiation of the igniter and its position relative to the container of fuel. In some designs, strong munitions cases allow the blast pressure to be contained long enough for the fuel to be heated up well above its auto-ignition temperature, so that once the container bursts the super-heated fuel will auto-ignite progressively as it comes into contact with atmospheric oxygen.[7][8][9][10][11][12][13][14][15][16][17]
Conventional upper and lower limits of flammability apply to such weapons. Close in, blast from the dispersal charge, compressing and heating the surrounding atmosphere, will have some influence on the lower limit. The upper limit has been demonstrated strongly to influence the ignition of fogs above pools of oil.[18] This weakness may be eliminated by designs where the fuel is preheated well above its ignition temperature, so that its cooling during its dispersion still results in a minimal ignition delay on mixing. The continual combustion of the outer layer of fuel molecules as they come into contact with the air, generates additional heat which maintains the temperature of the interior of the fireball, and thus sustains the detonation.[19][20][21]
In confinement, a series of reflective shock waves are generated,[22][23] which maintain the fireball and can extend its duration to between 10 and 50 ms as exothermic recombination reactions occur.[24] Further damage can result as the gases cool and pressure drops sharply, leading to a partial vacuum. This effect has given rise to the misnomer "vacuum bomb". Piston-type afterburning is also believed to occur in such structures, as flame-fronts accelerate through it.[25][26]
Fuel-air explosive[edit]
A fuel-air explosive (FAE) device consists of a container of fuel and two separate explosive charges. After the munition is dropped or fired, the first explosive charge bursts open the container at a predetermined height and disperses the fuel in a cloud that mixes with atmospheric oxygen (the size of the cloud varies with the size of the munition). The cloud of fuel flows around objects and into structures. The second charge then detonates the cloud, creating a massive blast wave. The blast wave destroys unreinforced buildings and equipment and kills and injures people. The antipersonnel effect of the blast wave is more severe in foxholes, on people with body armor, and in enclosed spaces such as caves, buildings, and bunkers.
Fuel-air explosives were first developed, and used in Vietnam, by the United States. Soviet scientists, however, quickly developed their own FAE weapons, which were reportedly used against China in the Sino-Soviet border conflict and in Afghanistan. Since then, research and development has continued and currently Russian forces field a wide array of third-generation FAE warheads.
Effect[edit]
A Human Rights Watch report of 1 February 2000[27] quotes a study made by the US Defense Intelligence Agency:
The [blast] kill mechanism against living targets is unique–and unpleasant.... What kills is the pressure wave, and more importantly, the subsequent rarefaction [vacuum], which ruptures the lungs.... If the fuel deflagrates but does not detonate, victims will be severely burned and will probably also inhale the burning fuel. Since the most common FAE fuels, ethylene oxide and propylene oxide, are highly toxic, undetonated FAE should prove as lethal to personnel caught within the cloud as most chemical agents.
According to a U.S. Central Intelligence Agency study,[27] "the effect of an FAE explosion within confined spaces is immense. Those near the ignition point are obliterated. Those at the fringe are likely to suffer many internal, and thus invisible injuries, including burst eardrums and crushed inner ear organs, severe concussions, ruptured lungs and internal organs, and possibly blindness." Another Defense Intelligence Agency document speculates that because the "shock and pressure waves cause minimal damage to brain tissue…it is possible that victims of FAEs are not rendered unconscious by the blast, but instead suffer for several seconds or minutes while they suffocate."[28]
Development history[edit]
Soviet and Russian developments[edit]
A RPO-A rocket and launcher.
The Soviet armed forces extensively developed FAE weapons,[29] such as the RPO-A, and used them in Chechnya.[30]
The Russian armed forces have developed thermobaric ammunition variants for several of their weapons, such as the TGB-7V thermobaric grenade with a lethality radius of 10 metres (33 ft), which can be launched from a RPG-7. The GM-94 is a 43 mm pump-action grenade launcher which is designed mainly to fire thermobaric grenades for close quarters combat. With the grenade weighing 250 grams (8.8 oz) and holding a 160 grams (5.6 oz) explosive mixture, its lethality radius is 3 metres (9.8 ft); however, due to the deliberate "fragmentation-free" design of the grenade, 4 metres (13 ft) is already considered a safe distance.[31] The RPO-A and upgraded RPO-M are infantry-portable RPGs designed to fire thermobaric rockets. The RPO-M, for instance, has a thermobaric warhead with a TNT equivalence of 5.5 kilograms (12 lb) of TNT and destructive capabilities similar to a 152 mm High explosive fragmentation artillery shell.[32][33] The RShG-1 and the RShG-2 are thermobaric variants of the RPG-27 and RPG-26 respectively. The RShG-1 is the more powerful variant, with its warhead having a 10 metres (33 ft) lethality radius and producing about the same effect as 6 kg (13 lb) of TNT.[34] The RMG is a further derivative of the RPG-26 that uses a tandem-charge warhead, whereby the precursor HEAT warhead blasts an opening for the main thermobaric charge to enter and detonate inside.[35] The RMG's precursor HEAT warhead can penetrate 300 mm of reinforced concrete or over 100 mm of Rolled homogeneous armour, thus allowing the 105 millimetres (4.1 in) diameter thermobaric warhead to detonate inside.[36]
The other examples include the SACLOS or millimeter wave radar-guided thermobaric variants of the 9M123 Khrizantema, the 9M133F-1 thermobaric warhead variant of the 9M133 Kornet, and the 9M131F thermobaric warhead variant of the 9K115-2 Metis-M, all of which are anti-tank missiles. The Kornet has since been upgraded to the Kornet-EM, and its thermobaric variant has a maximum range of 10 kilometres (6.2 mi) and has the TNT equivalent of 7 kilograms (15 lb) of TNT.[37] The 300 mm 9M55S thermobaric cluster warhead rocket was built to be fired from the BM-30 Smerch MLRS. A dedicated carrier of thermobaric weapons is the purpose-built TOS-1, a 24-tube MLRS designed to fire 220 mm caliber thermobaric rockets. A full salvo from the TOS-1 will cover a rectangle 200x400 metres.[38] The Iskander-M theatre ballistic missile can also carry a 700 kilograms (1,500 lb) thermobaric warhead.[39]
The fireball blast from the Russian Air Force's FOAB, the largest Thermobaric device to be detonated.
Many Russian Air Force munitions also have thermobaric variants. The 80 mm S-8 rocket has the S-8DM and S-8DF thermobaric variants. The S-8's larger 122 mm brother, the S-13 rocket, has the S-13D and S-13DF thermobaric variants. The S-13DF's warhead weighs only 32 kg (71 lb) but its power is equivalent to 40 kg (88 lb) of TNT. The KAB-500-OD variant of the KAB-500KR has a 250 kg (550 lb) thermobaric warhead. The ODAB-500PM and ODAB-500PMV unguided bombs carry a 190 kg (420 lb) fuel-air explosive each. The KAB-1500S GLONASS/GPS guided 1,500 kg (3,300 lb) bomb also has a thermobaric variant. Its fireball will cover over a 150-metre (490 ft) radius and its lethality zone is a 500-metre (1,600 ft) radius.[40] The 9M120 Ataka-V and the 9K114 Shturm ATGMs both have thermobaric variants.
In September 2007 Russia exploded the largest thermobaric weapon ever made. The weapon's yield was reportedly greater than that of the smallest dial-a-yield nuclear weapons at their lowest settings.[41][42] Russia named this particular ordnance the "Father of All Bombs" in response to the United States developed "Massive Ordnance Air Blast" (MOAB) bomb whose backronym is the "Mother of All Bombs", and which previously held the accolade of the most powerful non-nuclear weapon in history.[43] The bomb contains an about 7 tons charge of a liquid fuel such as ethylene oxide, mixed with an energetic nanoparticle such as aluminium, surrounding a high explosive burster[44] that when detonated created an explosion equivalent to 44 metric tons of TNT.
US developments[edit]
A BLU-72/B bomb on a USAF A-1E taking off from Nakhon Phanom, in September 1968.
Current US FAE munitions include:
BLU-73 FAE I
BLU-95 500-lb (FAE-II)
BLU-96 2,000-lb (FAE-II)
CBU-55 FAE I
CBU-72 FAE I
The XM1060 40-mm grenade is a small-arms thermobaric device, which was delivered to U.S. forces in April 2003.[45] Since the 2003 Invasion of Iraq, the US Marine Corps has introduced a thermobaric 'Novel Explosive' (SMAW-NE) round for the Mk 153 SMAW rocket launcher. One team of Marines reported that they had destroyed a large one-story masonry type building with one round from 100 yards (91 m).[46]
The AGM-114N Hellfire II, first used by U.S. forces in 2003 in Iraq, uses a Metal Augmented Charge (MAC) warhead that contains a thermobaric explosive fill using fluoridated aluminium layered between the charge casing and a PBXN-112 explosive mixture. When the PBXN-112 detonates, the aluminium mixture is dispersed and rapidly burns. The resultant sustained high pressure is extremely effective against people and structures.[47]
History[edit]
Military use[edit]
US Navy BLU-118B being prepared for shipping for use in Afghanistan, 5 March 2002.
The first experiments with thermobaric weapon were conducted in Germany during World War II and were led by Mario Zippermayr. The German bombs used coal dust as fuel and were extensively tested in 1943 and 1944, but did not reach mass production before the war ended.
The TOS-1 system was test fired in Panjshir valley during Soviet war in Afghanistan in the early 1980s.[48]
Unconfirmed reports suggest that Russian military forces used ground delivered thermobaric weapons in the storming of the Russian parliament during the 1993 Russian constitutional crisis and also during the Battle for Grozny (first and second Chechen wars) to attack dug in Chechen fighters. The use of both TOS-1 heavy MLRS and "RPO-A Shmel" shoulder-fired rocket system in the Chechen wars is reported to have occurred.[48][49]
It is theorized that a multitude of hand-held thermobaric weapons were used by the Russian Armed Forces in their efforts to retake the school during the 2004 Beslan school hostage crisis. The RPO-A and either the TGB-7V thermobaric rocket from the RPG-7 or rockets from either the RShG-1 or the RShG-2 is claimed to have been used by the Spetsnaz during the initial storming of the school.[50][51][52] At least 3 and as many as 9 RPO-A casings were later found at the positions of the Spetsnaz.[53][54] The Russian Government later admitted to the use of the RPO-A during the crisis.[55]
According to UK Ministry of Defence, British military forces have also used thermobaric weapons in their AGM-114N Hellfire missiles (carried by Apache helicopters and UAVs) against the Taliban in the War in Afghanistan.[56]
The US military also used thermobaric weapons in Afghanistan. On 3 March 2002, a single 2,000 lb (910 kg) laser guided thermobaric bomb was used by the United States Army against cave complexes in which Al-Qaeda and Taliban fighters had taken refuge in the Gardez region of Afghanistan.[57][58] The SMAW-NE was used by the US Marines during the First Battle of Fallujah and Second Battle of Fallujah.
Reports by the rebel fighters of the Free Syrian Army claim the Syrian Air Force used such weapons against residential area targets occupied by the rebel fighters, as for instance in the Battle for Aleppo[59] and also in Kafar Batna.[60] A United Nations panel of human rights investigators reported that the Syrian government used thermobaric bombs against the rebellious town of Qusayr in March 2013.[61]
Non-military use[edit]
Thermobaric and fuel-air explosives have been used in guerrilla warfare since the 1983 Beirut barracks bombing in Lebanon, which used a gas-enhanced explosive mechanism, probably propane, butane or acetylene.[62] The explosive used by the bombers in the 1993 World Trade Center bombing incorporated the FAE principle, using three tanks of bottled hydrogen gas to enhance the blast.[63][64] Jemaah Islamiyah bombers used a shock-dispersed solid fuel charge,[65] based on the thermobaric principle,[66] to attack the Sari nightclub in the 2002 Bali bombings.[67]
See also[edit]
Bunker buster
Dust explosion
FOAB
Flame fougasse
MOAB
RPO-A
SMAW
References[edit]
Jump up ^ Algeria Isp (2011-10-18). "Libye – l'Otan utilise une bombe FAE | Politique, Algérie". Algeria ISP. Retrieved 2013-04-23.
Jump up ^ Nettleton, J. Occ. Accidents, 1, 149 (1976).
Jump up ^ Strehlow, 14th. Symp. (Int.) Comb. 1189, Comb. Inst. (1973).
Jump up ^ Health and Safety Environmental Agency, 5th. and final report, 2008.
Jump up ^ See Nanofuel/Oxidizers For Energetic Compositions – John D. Sullivan and Charles N. Kingery (1994) High explosive disseminator for a high explosive air bomb.
Jump up ^ Slavica Terzić, Mirjana Dakić Kolundžija, Milovan Azdejković and Gorgi Minov (2004) Compatibility Of Thermobaric Mixtures Based On Isopropyl Nitrate And Metal Powders.
Jump up ^ Meyer, Rudolf; Josef Köhler and Axel Homburg (2007). Explosives. Weinheim: Wiley-VCH. pp. 312. ISBN 3-527-31656-6. OCLC 165404124.
Jump up ^ Howard C. Hornig (1998) Non-focusing active warhead.
Jump up ^ Chris Ludwig (Talley Defense) Verifying Performance of Thermobaric Materials for Small to Medium Caliber Rocket Warheads.
Jump up ^ Martin M.West (1982) Composite high explosives for high energy blast applications.
Jump up ^ Raafat H. Guirguis (2005) Reactively Induced Fragmenting Explosives.
Jump up ^ Michael Dunning, William Andrews and Kevin Jaansalu (2005) The Fragmentation of Metal Cylinders Using Thermobaric Explosives.
Jump up ^ David L. Frost, Fan Zhang, Stephen B. Murray and Susan McCahan Critical Conditions For Ignition Of Metal Particles In A Condensed Explosive.
Jump up ^ The Army Doctrine and Training Bulletin (2001) The Threat from Blast Weapons.
Jump up ^ INTERNATIONAL DEFENCE REVIEW (2004) ENHANCED BLAST AND THERMOBARICS.
Jump up ^ F. Winterberg Conjectured Metastable Super-Explosives formed under High Pressure for Thermonuclear Ignition.
Jump up ^ Zhang, Fan (Medicine Hat, CA) Murray, Stephen Burke (Medicine Hat, CA) Higgins, Andrew (Montreal, CA) (2005) Super compressed detonation method and device to effect such detonation.
Jump up ^ Nettleton, arch. combust.,1,131, (1981).
Jump up ^ Stephen B. Murray Fundamental and Applied Studies of Fuel-Air Detonation.
Jump up ^ John H. Lee (1992) Chemical initiation of detonation in fuel-air explosive clouds.
Jump up ^ Frank E. Lowther (1989) Nuclear-sized explosions without radiation.
Jump up ^ Nettleton, Comb. and Flame, 24,65 (1975).
Jump up ^ Fire Prev. Sci. and Tech. No. 19,4 (1976)
Jump up ^ May L.Chan (2001) Advanced Thermobaric Explosive Compositions.
Jump up ^ New Thermobaric Materials and Weapon Concepts.
Jump up ^ Robert C. Morris (2003) Small Thermobaric Weapons An Unnoticed Threat.[dead link]
^ Jump up to: a b "Backgrounder on Russian Fuel Air Explosives ("Vacuum Bombs") | Human Rights Watch". Hrw.org. 2000-02-01. Retrieved 2013-04-23.
Jump up ^ Defense Intelligence Agency, "Future Threat to the Soldier System, Volume I; Dismounted Soldier--Middle East Threat", September 1993, p. 73. Obtained by Human Rights Watch under the U.S. Freedom of Information Act.
Jump up ^ "Press | Human Rights Watch". Hrw.org. 2008-12-27. Retrieved 2009-07-30.
Jump up ^ Lester W. Grau and Timothy L. Thomas(2000)"Russian Lessons Learned From the Battles For Grozny"
Jump up ^ "Modern Firearms – GM-94". World.guns.ru. 2011-01-24. Retrieved 2011-07-12.
Jump up ^ "New RPO Shmel-M Infantry Rocket Flamethrower Man-Packable Thermobaric Weapon". defensereview.com. 2006-07-19. Retrieved 2012-08-27.
Jump up ^ "Shmel-M: Infantry Rocket-assisted Flamethrower of Enhanced Range and Lethality". Kbptula.ru. Retrieved 2013-12-28.
Jump up ^ "Modern Firearms – RShG-1". World.guns.ru. 2011-01-24. Retrieved 2011-07-12.
Jump up ^ "Modern Firearms – RMG". World.guns.ru. 2011-01-24. Retrieved 2011-07-12.
Jump up ^ "RMG - A new Multi-Purpose Assault Weapon from Bazalt". defense-update.com. Retrieved 2012-08-27.
Jump up ^ "Kornet-EM: Multi-purpose Long-range Missile System". Kbptula.ru. Retrieved 2013-12-28.
Jump up ^ "TOS-1 Heavy flamethrower system". military-today.com. Retrieved 2012-08-27.
Jump up ^ "SS-26". Missilethreat.com. Retrieved 2013-12-28.
Jump up ^ Air Power Australia (2007-07-04). "How to Destroy the Australian Defence Force". Ausairpower.net. Retrieved 2011-07-12.
Jump up ^ "Russia unveils devastating vacuum bomb". ABC News. 2007. Retrieved 2007-09-12.
Jump up ^ "Video of test explosion". BBC News. 2007. Retrieved 2007-09-12.
Jump up ^ Harding, Luke (2007-09-12). "Russia unveils the father of all bombs". London: The Guardian. Retrieved 2007-09-12.
Jump up ^ Berhie, Saba. "Dropping the Big One | Popular Science". Popsci.com. Retrieved 2011-07-12.
Jump up ^ John Pike (2003-04-22). "XM1060 40mm Thermobaric Grenade". Globalsecurity.org. Retrieved 2011-07-12.
Jump up ^ David Hambling (2005) "Marines Quiet About Brutal New Weapon"
Jump up ^ John Pike (2001-09-11). "AGM-114N Metal Augmented Charge (MAC) Thermobaric Hellfire". Globalsecurity.org. Retrieved 2011-07-12.
^ Jump up to: a b John Pike. "TOS-1 Buratino 220mm Multiple Rocket Launcher". Globalsecurity.org. Retrieved 2013-04-23.
Jump up ^ "Foreign Military Studies Office Publications - A 'Crushing' Victory: Fuel-Air Explosives and Grozny 2000". Fmso.leavenworth.army.mil. Retrieved 2013-04-23.
Jump up ^ "Russian forces faulted in Beslan school tragedy". Christian Science Monitor. 1 September 2006. Retrieved 14 February 2007.
Jump up ^ Russia: Independent Beslan Investigation Sparks Controversy, The Jamestown Foundation, 29 August 2006
Jump up ^ Beslan still a raw nerve for Russia, BBC News, 1 September 2006
Jump up ^ ACHING TO KNOW, Los Angeles Times, 27 August 2005
Jump up ^ Searching for Traces of “Shmel” in Beslan School, Kommersant, 12 September 2005
Jump up ^ A Reversal Over Beslan Only Fuels Speculation, The Moscow Times, 21 July 2005
Jump up ^ "MoD's Controversial Thermobaric Weapons Use in Afghanistan". Armedforces-int.com. 2008-06-23. Retrieved 2013-04-23.
Jump up ^ "US Uses Bunker-Busting 'Thermobaric' Bomb for First Time". Commondreams.org. 2002-03-03. Retrieved 2013-04-23.
Jump up ^ John Pike. "BLU-118/B Thermobaric Weapon Demonstration / Hard Target Defeat Program". Globalsecurity.org. Retrieved 2013-04-23.
Jump up ^ "Syria rebels say Assad using 'mass-killing weapons' in Aleppo". October 10, 2012. Retrieved November 11, 2012.
Jump up ^ "Dropping Thermobaric Bombs on Residential Areas in Syria_ Nov. 5. 2012". First Post. November 11, 2012. Retrieved November 11, 2012.
Jump up ^ Cumming-Bruce, Nick (2013-06-04). "U.N. Panel Reports Increasing Brutality by Both Sides in Syria". The New York Times.
Jump up ^ Richard J. Grunawalt. Hospital Ships In The War On Terror: Sanctuaries or Targets? (PDF), Naval War College Review, Winter 2005, pp. 110–11.
Jump up ^ Paul Rogers (2000) "Politics in the Next 50 Years: The Changing Nature of International Conflict"
Jump up ^ J. Gilmore Childers, Henry J. DePippo (February 24, 1998). "Senate Judiciary Committee, Subcommittee on Technology, Terrorism, and Government Information hearing on "Foreign Terrorists in America: Five Years After the World Trade Center"". Fas.org. Retrieved 2011-07-12.
Jump up ^ P. Neuwald, H. Reichenbach, A. L. Kuhl (2003). "Shock-Dispersed-Fuel Charges-Combustion in Chambers and Tunnels".
Jump up ^ David Eshel (2006). "Is the world facing Thermobaric Terrorism?".[dead link]
Jump up ^ Wayne Turnbull (2003). "Bali:Preparations".
External links[edit]
Fuel/Air Explosive (FAE)
Thermobaric Explosive (Global Security)
Aspects of thermobaric weaponry (PDF) – Dr. Anna E Wildegger-Gaissmaier, Australian Defence Force Health
Thermobaric warhead for RPG-7
XM1060 40 mm Thermobaric Grenade (Global Security)
Defense Update: Fuel-Air Explosive Mine Clearing System
Foreign Military Studies Office – A 'Crushing' Victory: Fuel-Air Explosives and Grozny 2000
Soon to make a comeback in Afghanistan
Russia claims to have tested the most powerful "Vacuum" weapon
Categories: Explosive weaponsAmmunitionThermobaric weaponsAnti-personnel weapons
Navigation menu
Create accountLog inArticleTalkReadEditView history
Main page
Contents
Featured content
Current events
Random article
Donate to Wikipedia
Wikimedia Shop
Interaction
Help
About Wikipedia
Community portal
Recent changes
Contact page
Tools
What links here
Related changes
Upload file
Special pages
Permanent link
Page information
Wikidata item
Cite this page
Print/export
Create a book
Download as PDF
Printable version
Languages
العربية
Беларуская
Български
Čeština
Deutsch
Español
فارسی
Français
हिन्दी
Italiano
עברית
Latviešu
Македонски
Nederlands
日本語
Polski
Русский
Suomi
Svenska
Türkçe
Українська
Tiếng Việt
粵語
中文
Edit links
This page was last modified on 28 November 2014 at 10:32.
Text is available under the Creative Commons Attribution-ShareAlike License; additional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.
Privacy policyAbout WikipediaDisclaimersContact WikipediaDevelopersMobile viewWikimedia Foundation Powered by MediaWiki
Gunpowder
From Wikipedia, the free encyclopedia
For other uses, see Gunpowder (disambiguation).
In American English, the term gunpowder also refers broadly to any gun propellant.[1] Gunpowder (black powder) as described in this article is not normally used in modern firearms, which instead use smokeless powders.
Black powder for muzzleloading rifles and pistols in FFFG granulation size. American Quarter (diameter 24 mm) for comparison.
Gunpowder, also known as black powder, is a chemical explosive—the earliest known. It is a mixture of sulfur, charcoal, and potassium nitrate (saltpeter). The sulfur and charcoal act as fuels, and the saltpeter is an oxidizer.[2][3] Because of its burning properties and the amount of heat and gas volume that it generates, gunpowder has been widely used as a propellant in firearms and as a pyrotechnic composition in fireworks.
Gunpowder is assigned the UN number UN0027 and has a hazard class of 1.1D. It has a flash point of approximately 427–464 °C (801–867 °F). The specific flash point may vary based on the specific composition of the gunpowder. Gunpowder's gravity is 1.70–1.82 (mercury method) or 1.92–2.08 (pycnometer), and it has a pH of 6.0–8.0. It is also considered to be an insoluble material.[4]
Gunpowder was, according to prevailing academic consensus, invented in the 9th century in China,[5][6] and the earliest record of a written formula for gunpowder appears in the 11th century Song Dynasty text, Wujing Zongyao.[7] This discovery led to the invention of fireworks and the earliest gunpowder weapons in China. In the centuries following the Chinese discovery, gunpowder weapons began appearing in the Muslim world, Europe, and India. The technology spread from China through the Middle East or Central Asia, and then into Europe.[8] The earliest Western accounts of gunpowder appear in texts written by English philosopher Roger Bacon in the 13th century.[9]
Gunpowder is classified as a low explosive because of its relatively slow decomposition rate and consequently low brisance. Low explosives deflagrate (i.e., burn) at subsonic speeds, whereas high explosives detonate, producing a supersonic wave. Gunpowder's burning rate increases with pressure, so it bursts containers if contained but otherwise just burns in the open. Ignition of the powder packed behind a bullet must generate enough pressure to force it from the muzzle at high speed, but not enough to rupture the gun barrel. Gunpowder thus makes a good propellant, but is less suitable for shattering rock or fortifications. Gunpowder was widely used to fill artillery shells and in mining and civil engineering to blast rock roughly until the second half of the 19th century, when the first high explosives (nitro-explosives) were discovered. Gunpowder is no longer used in modern explosive military warheads, nor is it used as main explosive in mining operations due to its cost relative to that of newer alternatives such as ammonium nitrate/fuel oil (ANFO).[10] Black powder is still used as a delay element in various munitions where its slow-burning properties are valuable.
Formulations used in blasting rock (such as in quarrying) are called blasting powder.
Contents [hide]
1 History
1.1 China
1.2 Middle East
1.3 Mainland Europe
1.4 Britain and Ireland
1.5 India
1.6 Indonesia
2 Manufacturing technology
3 Composition and characteristics
4 Serpentine
5 Corning
6 Modern types
7 Other types of gunpowder
8 Sulfur-free gunpowder
9 Combustion characteristics
9.1 Advantages
9.2 Disadvantages
9.3 Transportation
10 Other uses
11 See also
12 References
13 External links
History[edit]
Early Chinese rocket
A Mongol bomb thrown against a charging Japanese samurai during the Mongol invasions of Japan after founding the Yuan Dynasty, 1281.
Main article: History of gunpowder
Gunpowder was invented in China while taoists attempted to create a potion of immortality. Chinese military forces used gunpowder-based weapons (i.e. rockets, guns, cannons) and explosives (i.e. grenades and different types of bombs) against the Mongols when the Mongols attempted to invade and breach city fortifications on China's northern borders. After the Mongols conquered China and founded the Yuan Dynasty, they used the Chinese gunpowder-based weapons technology in their attempted invasion of Japan; they also used gunpowder to fuel rockets.
The mainstream scholarly consensus is that gunpowder was invented in China, spread through the Middle East, and then into Europe,[8] although there is a dispute over how much the Chinese advancements in gunpowder warfare influenced later advancements in the Middle East and Europe.[11][12] The spread of gunpowder across Asia from China is widely attributed to the Mongols. One of the first examples of Europeans encountering gunpowder and firearms is at the Battle of Mohi in 1241. At this battle the Mongols not only used gunpowder in early Chinese firearms but in the earliest grenades as well.
A major problem confronting the study of the early history of gunpowder is ready access to sources close to the events described. Often enough, the first records potentially describing use of gunpowder in warfare were written several centuries after the fact, and may well have been colored by the contemporary experiences of the chronicler.[13] It is also difficult to accurately translate original alchemy texts, especially medieval Chinese texts that try to explain phenomena through metaphor, into modern scientific language with rigidly defined terminology. The translation difficulty has led to errors or loose interpretations bordering on artistic licence.[14][15] Early writings potentially mentioning gunpowder are sometimes marked by a linguistic process where old words acquired new meanings.[16] For instance, the Arabic word naft transitioned from denoting naphtha to denoting gunpowder, and the Chinese word pao evolved from meaning catapult to referring to cannon.[17] According to science and technology historian Bert S. Hall: "It goes without saying, however, that historians bent on special pleading, or simply with axes of their own to grind, can find rich material in these terminological thickets."[18]
China[edit]
Further information: Wujing Zongyao, Four Great Inventions and List of Chinese inventions
Chinese Ming Dynasty (1368-1644) matchlock firearms
Saltpeter was known to the Chinese by the mid-1st century AD and there is strong evidence of the use of saltpeter and sulfur in various largely medicinal combinations.[19] A Chinese alchemical text dated 492 noted saltpeter burnt with a purple flame, providing a practical and reliable means of distinguishing it from other inorganic salts, thus enabling alchemists to evaluate and compare purification techniques; the earliest Latin accounts of saltpeter purification are dated after 1200.[20]
Yuan Dynasty bronze hand cannon from 1332 at th (c. 808); it describes mixing six parts sulfur to six parts saltpeter to one part birthwort herb (which would provide carbon).[21]
The first reference to the incendiary properties of such mixtures is the passage of the Zhenyuan miaodao yaolüe, a Taoist text tentatively dated to the mid-9th century AD:[20] "Some have heated together sulfur, realgar and saltpete with honey; smoke and flames result, so that their hands and faces have been burnt, and even the whole house where they were working burned down."[22] The Chinese word for "gunpowder" is Chinese: 火药/火藥; pinyin: huŏ yào /xuou yɑʊ/, which literally means "Fire Medicine";[23] however this name only came into use some centuries after the mixture's discovery.[24] During the 9th century, Taoist monks or alchemists searching for an elixir of immortality had serendipitously stumbled upon gunpowder.[8][25] The Chinese wasted little time in applying gunpowder to the development of weapons, and in the centuries that followed, they produced a variety of gunpowder weapons, including flamethrowers, rockets, bombs, and land mines, before inventing guns as a projectile weapon.[26] Archaeological evidence of a hand cannon has been excavated in Manchuria dated from the late 1200s[27] and the shells of explosive bombs have been discovered in a shipwreck off the shore of Japan dated from 1281, during the Mongol invasions of Japan.[28]
The Chinese "Wu Ching Tsung Yao" (Complete Essentials from the Military Classics), written by Tseng Kung-Liang between 1040–1044, provides encyclopedia references to a variety of mixtures that included petrochemicals—as well as garlic and honey. A slow match for flame throwing mechanisms using the siphon principle and for fireworks and rockets are mentioned. The mixture formulas in this book do not contain enough saltpeter to create an explosive however; being limited to at most 50% saltpeter, they produce an incendiary.[29] The Essentials was however written by a Song Dynasty court bureaucrat, and there's little evidence that it had any immediate impact on warfare; there is no mention of gunpowder use in the chronicles of the wars against the Tanguts in the eleventh century, and China was otherwise mostly at peace during this century. The first chronicled use of "fire spears" (or "fire lances") is at the siege of De'an in 1132.[30]
Formula for gunpowder in 1044 Wujing zongyao part I vol 12
Instruction for fire bomb in Wujing zongyao
Fire bomb
Fire grenade
Proto-cannon from the Ming Dynasty text Huolongjing
Land mine from the Ming Dynasty text Huolongjing
Fire arrow rocket launcher from the Wujing zongyao
Middle East[edit]
Main articles: Inventions in the Islamic world and Alchemy and chemistry in Islam
The Sultani Cannon, a very heavy bronze breech-loading cannon of type used by Ottoman Empire in the conquest of Constantinople, in 1453.
The Muslims acquired knowledge of gunpowder some time between 1240 and 1280, by which time the Syrian Hasan al-Rammah had written, in Arabic, recipes for gunpowder, instructions for the purification of saltpeter, and descriptions of gunpowder incendiaries. Gunpowder arrived in the Middle East, possibly through India, from China. This is implied by al-Rammah's usage of "terms that suggested he derived his knowledge from Chinese sources" and his references to saltpeter as "Chinese snow" Arabic: ثلج الصين thalj al-ṣīn, fireworks as "Chinese flowers" and rockets as "Chinese arrows".[31] However, because al-Rammah attributes his material to "his father and forefathers", al-Hassan argues that gunpowder became prevalent in Syria and Egypt by "the end of the twelfth century or the beginning of the thirteenth".[32] Persians called saltpeter "Chinese salt" [33][34][35][36][37] or "salt from Chinese salt marshes" (namak shūra chīnī Persian: نمک شوره چيني).[38][39]
A picture of a 15th-century Granadian cannon from the book Al-izz wal rifa'a.
Al-Hassan claims that in the Battle of Ain Jalut of 1260, the Mamluks used against the Mongols in "the first cannon in history" gunpowder formula with near-identical ideal composition ratios for explosive gunpowder.[32] Other historians urge caution regarding claims of Islamic firearms use in the 1204-1324 period as late medieval Arabic texts used the same word for gunpowder, naft, that they used for an earlier incendiary, naphtha.[13][17] Khan claims that it was invading Mongols who introduced gunpowder to the Islamic world[40] and cites Mamluk antagonism towards early musketeers in their infantry as an example of how gunpowder weapons were not always met with open acceptance in the Middle East.[41] Similarly, the refusal of their Qizilbash forces to use firearms contributed to the Safavid rout at Chaldiran in 1514.[41]
The earliest surviving documentary evidence for the use of the hand cannon, considered the oldest type of portable firearm and a forerunner of the handgun, are from several Arabic manuscripts dated to the 14th century.[42] Al-Hassan argues that these are based on earlier originals and that they report hand-held cannons being used by the Mamluks at the Battle of Ain Jalut in 1260.[32]
Hasan al-Rammah included 107 gunpowder recipes in his text al-Furusiyyah wa al-Manasib al-Harbiyya (The Book of Military Horsemanship and Ingenious War Devices), 22 of which are for rockets. If one takes the median of 17 of these 22 compositions for rockets (75% nitrates, 9.06% sulfur, and 15.94% carbon), it is nearly identical to the modern reported ideal gunpowder recipe of 75% potassium nitrate, 10% sulfur, and 15% carbon.[32]
The state-controlled manufacture of gunpowder by the Ottoman Empire through early supply chains to obtain nitre, sulfur and high-quality charcoal from oaks in Anatolia contributed significantly to its expansion the 15th and 18th century. It was not until later in the 19th century when the syndicalist production of Turkish gunpowder was greatly reduced, which coincided with the decline of its military might.[43]
Mainland Europe[edit]
Several sources mention Chinese firearms and gunpowder weapons being deployed by the Mongols against European forces at the Battle of Mohi in 1241.[44][45][46] Professor Kenneth Warren Chase credits the Mongols for introducing into Europe gunpowder and its associated weaponry.[47]
C. F. Temler interprets Peter, Bishop of Leon, as reporting the use of cannons in Seville in 1248.[48]
In Europe, one of the first mentions of gunpowder use appears in a passage found in Roger Bacon's Opus Maius and Opus Tertium in what has been interpreted as being firecrackers. The most telling passage reads: "We have an example of these things (that act on the senses) in [the sound and fire of] that children's toy which is made in many [diverse] parts of the world; i.e., a device no bigger than one's thumb. From the violence of that salt called saltpeter [together with sulfur and willow charcoal, combined into a powder] so horrible a sound is made by the bursting of a thing so small, no more than a bit of parchment [containing it], that we find [the ear assaulted by a noise] exceeding the roar of strong thunder, and a flash brighter than the most brilliant lightning."[9] In the early 20th century, British artillery officer Henry William Lovett Hime proposed that another work tentatively attributed to Bacon, Epistola de Secretis Operibus Artis et Naturae, et de Nullitate Magiae contained an encrypted formula for gunpowder. This claim has been disputed by historians of science including Lynn Thorndike, John Maxson Stillman and George Sarton and by Bacon's editor Robert Steele, both in terms of authenticity of the work, and with respect to the decryption method.[9] In any case, the formula claimed to have been decrypted (7:5:5 saltpeter:charcoal:sulfur) is not useful for firearms use or even firecrackers, burning slowly and producing mostly smoke.[49][50]
Cannon forged in 1667 at the Fortín de La Galera, Nueva Esparta, Venezuela.
The Liber Ignium, or Book of Fires, attributed to Marcus Graecus, is a collection of incendiary recipes, including some gunpowder recipes. Partington dates the gunpowder recipes to approximately 1300.[51] One recipe for "flying fire" (ingis volatilis) involves saltpeter, sulfur, and colophonium, which, when inserted into a reed or hollow wood, "flies away suddenly and burns up everything." Another recipe, for artificial "thunder", specifies a mixture of one pound native sulfur, two pounds linden or willow charcoal, and six pounds of saltpeter.[52] Another specifies a 1:3:9 ratio.[52]
Some of the gunpowder recipes of De Mirabilibus Mundi of Albertus Magnus are identical to the recipes of the Liber Ignium, and according to Partington, "may have been taken from that work, rather than conversely."[53] Partington suggests that some of the book may have been compiled by Albert's students, "but since it is found in thirteenth century manuscripts, it may well be by Albert."[53] Albertus Magnus died in 1280.
A common German folk-tale is of the German priest/monk named Berthold Schwarz who independently invented gunpowder, thus earning it the German name Schwarzpulver or in English Schwarz's powder. Schwarz is also German for black so this folk-tale, while likely containing elements of truth, is considered problematic.
A major advance in manufacturing began in Europe in the late 14th century when the safety and thoroughness of incorporation was improved by wet grinding; liquid, such as distilled spirits or perhaps the urine of wine-drinking bishops[54] was added during the grinding-together of the ingredients and the moist paste dried afterwards. (The principle of wet mixing to prevent the separation of dry ingredients, invented for gunpowder, is used today in the pharmaceutical industry.[55]) It was also discovered that if the paste was rolled into balls before drying the resulting gunpowder absorbed less water from the air during storage and traveled better. The balls were then crushed in a mortar by the gunner immediately before use, with the old problem of uneven particle size and packing causing unpredictable results.
If the right size particles were chosen, however, the result was a great improvement in power. Forming the damp paste into corn-sized clumps by hand or with the use of a sieve instead of larger balls produced a product after drying that loaded much better, as each tiny piece provided its own surrounding air space that allowed much more rapid combustion than a fine powder. This "corned" gunpowder was from 30% to 300% more powerful. An example is cited where 34 pounds of serpentine was needed to shoot a 47 pound ball, but only 18 pounds of corned powder.[54] The optimum size of the grain depended on its use; larger for large cannon, finer for small arms. Larger cast cannons were easily muzzle-loaded with corned powder using a long-handled ladle. Corned powder also retained the advantage of low moisture absorption, as even tiny grains still had much less surface area to attract water than a floury powder.
During this time, European manufacturers also began regularly purifying saltpeter, using wood ashes containing potassium carbonate to precipitate calcium from their dung liquor, and using ox blood, alum, and slices of turnip to clarify the solution.[54]
Gunpowder-making and metal-smelting and casting for shot and cannon fee was closely held by skilled military tradesmen, who formed guilds that collected dues, tested apprentices, and gave pensions. "Fire workers" were also required to craft fireworks for celebrations of victory or peace. During the Renaissance, two European schools of pyrotechnic thought emerged, one in Italy and the other at Nuremberg, Germany. Vannoccio Biringuccio, born in 1480, was a member of the guild Fraternita di Santa Barbara but broke with the tradition of secrecy by setting down everything he knew in a book titled De la pirotechnia, written in vernacular. The first printed book on either gunpowder or metalworking, it was published posthumously in 1540, with 9 editions over 138 years, and also reprinted by MIT Press in 1966.[54] By the mid-17th century fireworks were used for entertainment on an unprecedented scale in Europe, being popular even at resorts and public gardens.[56]
In 1774 Louis XVI ascended to the throne of France at age 20. After he discovered that France was not self-sufficient in gunpowder, a Gunpowder Administration was established; to head it, the lawyer Antoine Lavoisier was appointed. Although from a bourgeois family, after his degree in law Lavoisier became wealthy from a company set up to collect taxes for the Crown; this allowed him to pursue experimental natural science as a hobby.[57]
Without access to cheap Indian saltpeter (controlled by the British), for hundreds of years France had relied on saltpetermen with royal warrants, the droit de fouille or "right to dig", to seize nitrous-containing soil and demolished walls of barnyards, without compensation to the owners.[58] This caused farmers, the wealthy, or entire villages to bribe the petermen and the associated bureaucracy to leave their buildings alone and the saltpeter uncollected. Lavoisier instituted a crash program to increase saltpeter production, revised (and later eliminated) the droit de fouille, researched best refining and powder manufacturing methods, instituted management and record-keeping, and established pricing that encouraged private investment in works. Although saltpeter from new Prussian-style putrefaction works had not been produced yet (the process taking about 18 months), in only a year France had gunpowder to export. A chief beneficiary of this surplus was the American Revolution. By careful testing and adjusting the proportions and grinding time, powder from mills such as at Essonne outside Paris became the best in the world by 1788, and inexpensive.[58] [59]
Britain and Ireland[edit]
The old Powder or Pouther magazine dating from 1642, built by order of Charles I. Irvine, North Ayrshire, Scotland
Gunpowder production in Britain appears to have started in the mid 14th century AD with the aim of supplying the English Crown.[60] Records show that gunpowder was being made, in England, in 1346, at the Tower of London; a powder house existed at the Tower in 1461; and in 1515 three King's gunpowder makers worked there.[60] Gunpowder was also being made or stored at other Royal castles, such as Portchester. By the early 14th century, according to N.J.G. Pounds's study The Medieval Castle in England and Wales, many English castles had been deserted and others were crumbling. Their military significance faded except on the borders. Gunpowder had made smaller castles useless.[61]
Henry VIII of England was short of gunpowder when he invaded France in 1544 and England needed to import gunpowder via the port of Antwerp in what is now Belgium.[60]
The English Civil War (1642–1645) led to an expansion of the gunpowder industry, with the repeal of the Royal Patent in August 1641.[60]
Two British physicists, Andrew Noble and Frederick Abel, worked to improve the properties of black powder during the late 19th century. This formed the basis for the Noble-Abel gas equation for internal ballistics.[62]
The introduction of smokeless powder in the late 19th century led to a contraction of the gunpowder industry. After the end of World War I, the majority of the United Kingdom gunpowder manufacturers merged into a single company, "Explosives Trades limited"; and number of sites were closed down, including those in Ireland. This company became Nobel Industries Limited; and in 1926 became a founding member of Imperial Chemical Industries. The Home Office removed gunpowder from its list of Permitted Explosives; and shortly afterwards, on 31 December 1931, the former Curtis & Harvey's Glynneath gunpowder factory at Pontneddfechan, in Wales, closed down, and it was demolished by fire in 1932.[63]
Gunpowder storing barrels at Martello tower in Point Pleasant Park
The last remaining gunpowder mill at the Royal Gunpowder Factory, Waltham Abbey was damaged by a German parachute mine in 1941 and it never reopened.[64] This was followed by the closure of the gunpowder section at the Royal Ordnance Factory, ROF Chorley, the section was closed and demolished at the end of World War II; and ICI Nobel's Roslin gunpowder factory, which closed in 1954.[64][65]
This left the sole United Kingdom gunpowder factory at ICI Nobel's Ardeer site in Scotland; it too closed in October 1976.[64] Since then gunpowder has been imported into the United Kingdom. In the late 1970s/early 1980s gunpowder was bought from eastern Europe, particularly from what was then the German Democratic Republic and former Yugoslavia.
India[edit]
In the year 1780 the British began to annex the territories of the Sultanate of Mysore, during the Second Anglo-Mysore War. The British battalion was defeated during the Battle of Guntur, by the forces of Hyder Ali, who effectively utilized Mysorean rockets and Rocket artillery against the closely massed British forces.
Mughal Emperor Shah Jahan, hunting deer using a Matchlock as the sun sets in the horizon.
Gunpowder and gunpowder weapons were transmitted to India through the Mongol invasions of India.[66][67] The Mongols were defeated by Alauddin Khilji of the Delhi Sultanate, and some of the Mongol soldiers remained in northern India after their conversion to Islam.[67] It was written in the Tarikh-i Firishta (1606–1607) that Nasir ud din Mahmud the ruler of the Delhi Sultanate presented the envoy of the Mongol ruler Hulegu Khan with a dazzling pyrotechnics display upon his arrival in Delhi in 1258 AD. Nasir ud din Mahmud tried to express his strength as a ruler and tried to ward off any Mongol attempt similar to the Siege of Baghdad (1258).[68] Firearms known as top-o-tufak also existed in many Muslim kingdoms in India by as early as 1366 AD.[68] From then on the employment of gunpowder warfare in India was prevalent, with events such as the "Siege of Belgaum" in 1473 by Sultan Muhammad Shah Bahmani.[69]
The shipwrecked Ottoman Admiral Seydi Ali Reis is known to have introduced the earliest type of Matchlock weapons, which the Ottomans used against the Portuguese during the Siege of Diu (1531). After that, a diverse variety of firearms; large guns in particular, became visible in Tanjore, Dacca, Bijapur, and Murshidabad.[70] Guns made of bronze were recovered from Calicut (1504)- the former capital of the Zamorins[71]
The Mughal Emperor Akbar mass-produced matchlocks for the Mughal Army. Akbar is personally known to have shot a leading Rajput commander during the Siege of Chittorgarh.[72] The Mughals began to use Bamboo rockets (mainly for signalling) and employ Sappers: special units that undermined heavy stone fortifications to plant gunpowder charges.
The Mughal Emperor Shah Jahan is known to have introduced much more advanced Matchlocks, their designs were a combination of Ottoman and Mughal designs. Shah Jahan also countered the British and other Europeans in his province of Gujarāt, which supplied Europe saltpeter for use in gunpowder warfare during the 17th century.[73] Bengal and Mālwa participated in saltpeter production.[73] The Dutch, French, Portuguese, and English used Chhapra as a center of saltpeter refining.[73]
Ever since the founding of the Sultanate of Mysore by Hyder Ali, French military officers were employed to train the Mysore Army. Hyder Ali and his son Tipu Sultan were the first to introduce modern Cannons and Muskets, their army was also the first in India to have official uniforms. During the Second Anglo-Mysore War Hyder Ali and his son Tipu Sultan unleashed the Mysorean rockets at their British opponents effectively defeating them on various occasions. The Mysorean rockets inspired the development of the Congreve rocket, which the British widely utilized during the Napoleonic Wars and the War of 1812.[74]
Indonesia[edit]
The Javanese Majapahit Empire was arguably able to encompass much of modern day Indonesia due to its unique mastery of bronze smithing and use of a central arsenal fed by a large number of cottage industries within the immediate region. Documentary and archeological evidence indicate that Arab or Indian traders introduced gunpowder, gonnes, muskets, blunderbusses, and cannons to the Javanese, Acehnese, and Batak via long established commercial trade routes around the early to mid 14th century CE.[75] Portuguese and Spanish invaders were unpleasantly surprised and occasionally even outgunned on occasion.[76] The resurgent Singhasari Empire overtook Sriwijaya and later emerged as the Majapahit whose warfare featured the use of fire-arms and cannonade.[77] Circa 1540 CE the Javanese, always alert for new weapons found the newly arrived Portuguese weaponry superior to that of the locally made variants. Javanese bronze breech-loaded swivel-guns, known as meriam, or erroneously as lantaka, was used widely by the Majapahit navy as well as by pirates and rival lords. The demise of the Majapahit empire and the dispersal of disaffected skilled bronze cannon-smiths to Brunei, modern Sumatra, Malaysia and the Philippines lead to widespread use, especially in the Makassar Strait.
Saltpeter harvesting was recorded by Dutch and German travelers as being common in even the smallest villages and was collected from the decomposition process of large dung hills specifically piled for the purpose. The Dutch punishment for possession of non-permitted gunpowder appears to have been amputation.[78] Ownership and manufacture of gunpowder was later prohibited by the colonial Dutch occupiers.[75] According to a colonel McKenzie quoted in Sir Thomas Stamford Raffles, The History of Java (1817), the purest sulfur was supplied from a crater from a mountain near the straits of Bali.[77]
Manufacturing technology[edit]
Edge-runner mill in a restored mill, at Eleutherian Mills
For the most powerful black powder meal, a wood charcoal is used. The best wood for the purpose is Pacific willow,[79] but others such as alder or buckthorn can be used. In Great Britain between the 15th to 19th centuries charcoal from alder buckthorn was greatly prized for gunpowder manufacture; cottonwood was used by the American Confederate States.[80] The ingredients are reduced in particle size and mixed as intimately as possible. Originally this was with a mortar-and-pestle or a similarly operating stamping-mill, using copper, bronze or other non-sparking materials, until supplanted by the rotating ball mill principle with non-sparking bronze or lead. Historically, a marble or limestone edge runner mill, running on a limestone bed was used in Great Britain; however, by the mid 19th century AD this had changed to either an iron shod stone wheel or a cast iron wheel running on an iron bed.[81] The mix was dampened with alcohol or water during grinding to prevent accidental ignition. This also helps the extremely soluble saltpeter mix into the microscopic nooks and crannies of the very high surface-area charcoal.
Around the late 14th century AD, European powdermakers first began adding liquid during grinding to improve mixing, reduce dust, and with it the risk of explosion.[82] The powder-makers would then shape the resulting paste of dampened gunpowder, known as mill cake, into corns, or grains, to dry. Not only did corned powder keep better because of its reduced surface area, gunners also found that it was more powerful and easier to load into guns. Before long, powder-makers standardized the process by forcing mill cake through sieves instead of corning powder by hand.
The improvement was based on reducing the surface area of a higher density composition. At the beginning of the 19th century, makers increased density further by static pressing. They shoveled damp mill cake into a two-foot square box, placed this beneath a screw press and reduced it to 1/2 its volume. "Presscake" had the hardness of slate. They broke the dried slabs with hammers or rollers, and sorted the granules with sieves into different grades. In the United States, Irenee du Pont, who had learned the trade from Lavoisier, tumbled the dried grains in rotating barrels to round the edges and increase durability during shipping and handling. (Sharp grains rounded off in transport, producing fine "meal dust" that changed the burning properties.)
Another advance was the manufacture of kiln charcoal by distilling wood in heated iron retorts instead of burning it in earthen pits. Controlling the temperature influenced the power and consistency of the finished gunpowder. In 1863, in response to high prices for Indian saltpeter, DuPont chemists developed a process using potash or mined potassium chloride to convert plentiful Chilean sodium nitrate to potassium nitrate.[83]
During the 18th century gunpowder factories became increasingly dependent on mechanical energy.[84] Despite mechanization, production difficulties related to humidity control, especially during the pressing, were still present in the late 19th century. A paper from 1885 laments that "Gunpowder is such a nervous and sensitive spirit, that in almost every process of manufacture it changes under our hands as the weather changes." Pressing times to the desired density could vary by factor of three depending on the atmospheric humidity.[85]
Composition and characteristics[edit]
The term black powder was coined in the late 19th century, primarily in the United States, to distinguish prior gunpowder formulations from the new smokeless powders and semi-smokeless powders, in cases where these are not referred to as cordite. Semi-smokeless powders featured bulk volume properties that approximated black powder, but had significantly reduced amounts of smoke and combustion products. Smokeless powder has different burning properties (pressure vs. time) and can generate higher pressures and work per gram. This can rupture older weapons designed for black powder. Smokeless powders ranged in color from brownish tan to yellow to white. Most of the bulk semi-smokeless powders ceased to be manufactured in the 1920s.[86][87][88]
Black powder is a granular mixture of
a nitrate, typically potassium nitrate (KNO3), which supplies oxygen for the reaction;
charcoal, which provides carbon and other fuel for the reaction, simplified as carbon (C);
sulfur (S), which, while also serving as a fuel, lowers the temperature required to ignite the mixture, thereby increasing the rate of combustion.
Potassium nitrate is the most important ingredient in terms of both bulk and function because the combustion process releases oxygen from the potassium nitrate, promoting the rapid burning of the other ingredients.[89] To reduce the likelihood of accidental ignition by static electricity, the granules of modern black powder are typically coated with graphite, which prevents the build-up of electrostatic charge.
Charcoal does not consist of pure carbon; rather, it consists of partially pyrolyzed cellulose, in which the wood is not completely decomposed. Carbon differs from charcoal. Whereas charcoal's autoignition temperature is relatively low, carbon's is much greater. Thus, a black powder composition containing pure carbon would burn similarly to a match head, at best.[90]
The current standard composition for the black powders that are manufactured by pyrotechnicians was adopted as long ago as 1780. Proportions by weight are 75% potassium nitrate (known as saltpeter or saltpetre), 15% softwood charcoal, and 10% sulfur.[81] These ratios have varied over the centuries and by country, and can be altered somewhat depending on the purpose of the powder. For instance, power grades of black powder, unsuitable for use in firearms but adequate for blasting rock in quarrying operations, is called blasting powder rather than gunpowder with standard proportions of 70% nitrate, 14% charcoal, and 16% sulfur; blasting powder may be made with the cheaper sodium nitrate substituted for potassium nitrate and proportions may be as low as 40% nitrate, 30% charcoal, and 30% sulfur.[91] In 1857, Lamont DuPont solved the main problem of using cheaper sodium nitrate formulations when he patented DuPont "B" Blasting powder. After manufacturing grains from press-cake in the usual way, his process tumbled the powder with graphite dust for 12 hours. This formed a graphite coating on each grain that reduced its ability to absorb moisture.[92]
French war powder in 1879 used the ratio 75% saltpeter, 12.5% charcoal, 12.5% sulfur. English war powder in 1879 used the ratio 75% saltpeter, 15% charcoal, 10% sulfur.[93] The British Congreve rockets used 62.4% saltpeter, 23.2% charcoal and 14.4% sulfur, but the British Mark VII gunpowder was changed to 65% saltpeter, 20% charcoal and 15% sulfur.[94] The explanation for the wide variety in formulation relates to usage. Powder used for rocketry can use a slower burn rate since it accelerates the projectile for a much longer time—whereas powders for weapons such as flintlocks, cap-locks, or matchlocks need a higher burn rate to accelerate the projectile in a much shorter distance. Cannons usually used lower burn rate powders, because most would burst with higher burn rate powders.
Serpentine[edit]
The original dry-compounded powder used in fifteenth-century Europe was known as "Serpentine", either a reference to Satan[95] or to a common artillery piece that used it.[96] The ingredients were ground together with a mortar and pestle, perhaps for 24 hours,[96] resulting in a fine flour. Vibration during transportation could cause the components to separate again, requiring remixing in the field. Also if the quality of the saltpeter was low (for instance if it was contaminated with highly hygroscopic calcium nitrate), or if the powder was simply old (due to the mildly hygroscopic nature of potassium nitrate), in humid weather it would need to be re-dried. The dust from "repairing" powder in the field was a major hazard.
Loading cannons or bombards before the powder-making advances of the Renaissance was a skilled art. Fine powder loaded haphazardly or too tightly would burn incompletely or too slowly. Typically, the breech-loading powder chamber in the rear of the piece was filled only about half full, the serpentine powder neither too compressed nor too loose, a wooden bung pounded in to seal the chamber from the barrel when assembled, and the projectile placed on. A carefully determined empty space was necessary for the charge to burn effectively. When the cannon was fired through the touchhole, turbulence from the initial surface combustion caused the rest of the powder to be rapidly exposed to the flame.[96]
The advent of much more powerful and easy to use corned powder changed this procedure, but serpentine was used with older guns into the seventeenth century.[97]
Corning[edit]
For gunpowder to explode effectively, the combustible ingredients must be reduced to the smallest possible particle sizes, and thoroughly mixed as possible. Once mixed, however, for better results in a gun, makers discovered that the final product should be in the form of individual, dense, grains that spread the fire quickly from grain to grain, much as straw or twigs catch fire more quickly than a pile of sawdust.
Primarily for safety reasons, size reduction and mixing is done while the ingredients are damp, usually with water. After 1800, instead of forming grains by hand or with sieves, the damp mill-cake was pressed in molds to increase its density and extract the liquid, forming press-cake. The pressing took varying amounts of time, depending on conditions such as atmospheric humidity. The hard, dense product was broken again into tiny pieces, which were separated with sieves to produce a uniform product for each purpose: coarse powders for cannons, finer grained powders for muskets, and the finest for small hand guns and priming.[97] Inappropriately fine-grained powder often caused cannons to burst before the projectile could move down the barrel, due to the high initial spike in pressure.[98] Mammoth powder with large grains made for Rodman's 15-inch cannon reduced the pressure to only 20 percent as high as ordinary cannon powder would have produced.[99]
In the mid-nineteenth century, measurements were made determining that the burning rate within a grain of black powder (or a tightly packed mass) is about 0.20 fps, while the rate of ignition propagation from grain to grain is around 30 fps, over two orders of magnitude faster.[97]
Modern types[edit]
Modern corning first compresses the fine black powder meal into blocks with a fixed density (1.7 g/cm³).[100] In the United States, gunpowder grains were designated F (for fine) or C (for coarse). Grain diameter decreased with a larger number of Fs and increased with a larger number of Cs, ranging from about 2 mm for 7F to 15 mm for 7C. Even larger grains were produced for artillery bore diameters greater than about 17 cm (6.7 in). The standard DuPont Mammoth powder developed by Thomas Rodman and Lammot du Pont for use during the American Civil War had grains averaging 0.6 inches diameter, with edges rounded in a glazing barrel.[99] Other versions had grains the size of golf and tennis balls for use in 20-inch (50-cm) Rodman guns.[101] In 1875 DuPont introduced Hexagonal powder for large artillery, which was pressed using shaped plates with a small center core—about 1.5 inches diameter, like a wagon wheel nut, the center hole widened as the grain burned.[102] By 1882 German makers also produced hexagonal grained powders of a similar size for artillery.[102]
By the late 19th century manufacturing focused on standard grades of black powder from Fg used in large bore rifles and shotguns, through FFg (medium and small-bore arms such as muskets and fusils), FFFg (small-bore rifles and pistols), and FFFFg (extreme small bore, short pistols and most commonly for priming flintlocks).[103] A coarser grade for use in military artillery blanks was designated A-1. These grades were sorted on a system of screens with oversize retained on a mesh of 6 wires per inch, A-1 retained on 10 wires per inch, Fg retained on 14, FFg on 24, FFFg on 46, and FFFFg on 60. Fines designated FFFFFg were usually reprocessed to minimize explosive dust hazards.[104] In the United Kingdom, the main service gunpowders were classified RFG (rifle grained fine) with diameter of one or two millimeters and RLG (rifle grained large) for grain diameters between two and six millimeters.[101] Gunpowder grains can alternatively be categorized by mesh size: the BSS sieve mesh size, being the smallest mesh size, which retains no grains. Recognized grain sizes are Gunpowder G 7, G 20, G 40, and G 90.
Owing to the large market of antique and replica black-powder firearms in the US, modern gunpowder substitutes like Pyrodex, Triple Seven and Black Mag3[105] pellets have been developed since the 1970s. These products, which should not be confused with smokeless powders, aim to produce less fouling (solid residue), while maintaining the traditional volumetric measurement system for charges. Claims of less corrosiveness of these products have been controversial however. New cleaning products for black-powder guns have also been developed for this market.[103]
Other types of gunpowder[edit]
Besides black powder, there are other historically important types of gunpowder. "Brown gunpowder" is cited as composed of 79% nitre, 3% sulfur, and 18% charcoal per 100 of dry powder, with about 2% moisture. Prismatic Brown Powder is a large-grained product the Rottweil Company introduced in 1884 in Germany, which was adopted by the British Royal Navy shortly thereafter. The French navy adopted a fine, 3.1 millimeter, not prismatic grained product called Slow Burning Cocoa (SBC) or "cocoa powder". These brown powders reduced burning rate even further by using as little as 2 percent sulfur and using charcoal made from rye straw that had not been completely charred, hence the brown color.[102]
Lesmok powder was a product developed by DuPont in 1911[106] one of several semi-smokeless products in the industry containing a mixture of black and nitrocellulose powder. It was sold to Winchester and others primarily for .22 and .32 small calibers. Its advantage was that it was believed at the time to be less corrosive than smokeless powders then in use. It was not understood in the U.S. until the 1920s that the actual source of corrosion was the potassium chloride residue from potassium chlorate sensitized primers. The bulkier black powder fouling better disperses primer residue. Failure to mitigate primer corrosion by dispersion caused the false impression that nitrocellulose-based powder caused corrosion.[107] Lesmok had some of the bulk of black powder for dispersing primer residue, but somewhat less total bulk than straight black powder, thus requiring less frequent bore cleaning.[108] It was last sold by Winchester in 1947.
Sulfur-free gunpowder[edit]
Burst barrel of a muzzle loader pistol replica, which was loaded with nitrocellulose powder instead of black powder and couldn't withstand the higher pressures of the modern propellant
The development of smokeless powders, such as cordite, in the late 19th century created the need for a spark-sensitive priming charge, such as gunpowder. However, the sulfur content of traditional gunpowders caused corrosion problems with Cordite Mk I and this led to the introduction of a range of sulfur-free gunpowders, of varying grain sizes.[64] They typically contain 70.5 parts of saltpeter and 29.5 parts of charcoal.[64] Like black powder, they were produced in different grain sizes. In the United Kingdom, the finest grain was known as sulfur-free mealed powder (SMP). Coarser grains were numbered as sulfur-free gunpowder (SFG n): 'SFG 12', 'SFG 20', 'SFG 40' and 'SFG 90', for example; where the number represents the smallest BSS sieve mesh size, which retained no grains.
Sulfur's main role in gunpowder is to decrease the ignition temperature. A sample reaction for sulfur-free gunpowder would be
6 KNO3 + C7H4O → 3 K2CO3 + 4 CO2 + 2 H2O + 3 N2
Combustion characteristics[edit]
A simple, commonly cited, chemical equation for the combustion of black powder is
2 KNO3 + S + 3 C → K2S + N2 + 3 CO2.
A balanced, but still simplified, equation is[109]
10 KNO3 + 3 S + 8 C → 2 K2CO3 + 3 K2SO4 + 6 CO2 + 5 N2.
Although charcoal's chemical formula varies, it can be best summed up by its empirical formula: C7H4O.
Therefore, an even more accurate equation of the decomposition of regular black powder with the use of sulfur can be described as:
6 KNO3 + C7H4O + 2 S → K2CO3 + K2SO4 + K2S + 4 CO2 + 2 CO + 2 H2O + 3 N2
Black powder without the use of sulfur:
10 KNO3 + 2 C7H4O → 5 K2CO3 + 4 CO2 + 5 CO + 4 H2O + 5 N2
The burning of gunpowder does not take place as a single reaction, however, and the byproducts are not easily predicted. One study's results showed that it produced (in order of descending quantities) 55.91% solid products: potassium carbonate, potassium sulfate, potassium sulfide, sulfur, potassium nitrate, potassium thiocyanate, carbon, ammonium carbonate and 42.98% gaseous products: carbon dioxide, nitrogen, carbon monoxide, hydrogen sulfide, hydrogen, methane, 1.11% water.
Black powder made with less-expensive and more plentiful sodium nitrate (in appropriate proportions) works just as well but is more hygroscopic than powders made from Potassium nitrate—popularly known as saltpeter. Because corned black powder grains made with saltpeter are less affected by moisture in the air, they can be stored unsealed without degradation by humidity. Muzzleloaders have been known to fire after hanging on a wall for decades in a loaded state, provided they remained dry. By contrast, black powder made with sodium nitrate must be kept sealed to remain stable.
Gunpowder contains 3 megajoules per kilogram, and contains its own oxidant. For comparison, the energy density of TNT is 4.7 megajoules per kilogram, and the energy density of gasoline is 47.2 megajoules per kilogram. Gunpowder is a low explosive and as such it does not detonate; rather it deflagrates. Since it contains its own oxidizer and additionally burns faster under pressure, its combustion is capable of rupturing containers such as shell, grenade, or improvised "pipe bomb" or "pressure cooker" casings, forming shrapnel.
Advantages[edit]
In quarrying, high explosives are generally preferred for shattering rock. However, because of its low brisance, black powder causes fewer fractures and results in more usable stone compared to other explosives, making black powder useful for blasting monumental stone such as granite and marble. Black powder is well suited for blank rounds, signal flares, burst charges, and rescue-line launches. Black powder is also used in fireworks for lifting shells, in rockets as fuel, and in certain special effects.
Disadvantages[edit]
Black powder has a low energy density compared to modern "smokeless" powders, and thus to achieve high energy loadings, large amounts of black powder are needed with heavy projectiles. Black powder also produces thick smoke as a byproduct, which in military applications may give a soldier's location away to an enemy observer and may also impair aiming for additional shots.
Combustion converts less than half the mass of black powder to gas. The rest ends up as a thick layer of soot inside the barrel. In addition to being a nuisance, the residue from burnt black powder is hygroscopic and with the addition of moisture absorbed from the air, this residue forms a caustic substance. The soot contains potassium oxide or sodium oxide that turns into potassium hydroxide, or sodium hydroxide, which corrodes wrought iron or steel gun barrels. Black powder arms must be well cleaned both inside and out to remove the residue. The matchlock musket or pistol (an early gun ignition system), as well as the flintlock would often be unusable in wet weather, due to powder in the pan being exposed and dampened. Because of this unreliability, soldiers carrying muskets, known as musketeers, were armed with additional weapons such as swords or pikes. The bayonet was developed to allow the musket to be used as a pike, thus eliminating the need for the soldier to carry a secondary weapon.
Transportation[edit]
The United Nations Model Regulations on the Transportation of Dangerous Goods and national transportation authorities, such as United States Department of Transportation, have classified gunpowder (black powder) as a Group A: Primary explosive substance for shipment because it ignites so easily. Complete manufactured devices containing black powder are usually classified as Group D: Secondary detonating substance, or black powder, or article containing secondary detonating substance, such as firework, class D model rocket engine, etc., for shipment because they are harder to ignite than loose powder. As explosives, they all fall into the category of Class 1.
Other uses[edit]
Besides its use as an explosive, gunpowder has been occasionally employed for other purposes; after the Battle of Aspern-Essling (1809), the surgeon of the Napoleonic Army Larrey combated the lack of food for the wounded under his care by preparing a bouillon of horse meat seasoned with gunpowder for lack of salt.[110][111] It was also used for sterilizing on ships when there was no alcohol.
Jack Tars (British sailors) used gunpowder to create tattoos when ink wasn't available, by pricking the skin and rubbing the powder into the wound in a method known as traumatic tattooing.[112]
Christiaan Huygens experimented with gunpowder in 1673 in an early attempt to build an internal combustion engine, but he did not succeed. Modern attempts to recreate his invention were similarly unsuccessful.
Fireworks use gunpowder as lifting and burst charges, although sometimes other more powerful compositions are added to the burst charge to improve performance in small shells or provide a louder report. Most modern firecrackers no longer contain black powder.
Beginning in the 1930s, gunpowder or smokeless powder was used in rivet guns, stun guns for animals, cable splicers and other industrial construction tools.[113] The "stud gun" drove nails or screws into solid concrete, a function not possible with hydraulic tools. See Powder-actuated tool. Shotguns have been used to eliminate persistent material rings in operating rotary kilns (such as those for cement, lime, phosphate, etc.) and clinker in operating furnaces, and commercial tools make the method more reliable.[114]
Near London in 1853, Captain Shrapnel demonstrated a method for crushing gold-bearing ores by firing them from a cannon into an iron chamber, and "much satisfaction was expressed by all present". He hoped it would be useful on the goldfields of California and Australia. Nothing came of the invention, as continuously-operating crushing machines that achieved more reliable comminution were already coming into use.[115]
See also[edit]
Ballistics
Black powder substitute
Faversham explosives industry
Bulk loaded liquid propellants
Gunpowder magazine
Gunpowder Plot
Berthold Schwarz
Gunpowder warfare
History of gunpowder
Technology of the Song Dynasty
References[edit]
Jump up ^ http://www.merriam-webster.com/dictionary/gunpowder
Jump up ^ Jai Prakash Agrawal (2010). High Energy Materials: Propellants, Explosives and Pyrotechnics. Wiley-VCH. p. 69. ISBN 978-3-527-32610-5.
Jump up ^ David Cressy, Saltpeter: The Mother of Gunpowder (Oxford University Press, 2013)
Jump up ^ Owen Compliance Services. "Black Powder". Material Safety Data Sheet. Retrieved 31 August 2014.
Jump up ^ http://www.history.com/shows/ancient-discoveries/articles/who-built-it-first-2
Jump up ^ http://chemistry.about.com/od/historyofchemistry/a/gunpowder.htm
Jump up ^ Chase 2003:31 : "the earliest surviving formulas for gunpowder can be found in the Wujing zongyao, a military work from around 1040"
^ Jump up to: a b c Buchanan 2006, p. 2 "With its ninth century AD origins in China, the knowledge of gunpowder emerged from the search by alchemists for the secrets of life, to filter through the channels of Middle Eastern culture, and take root in Europe with consequences that form the context of the studies in this volume."
^ Jump up to: a b c Joseph Needham; Gwei-Djen Lu; Ling Wang (1987). Science and civilisation in China, Volume 5, Part 7. Cambridge University Press. pp. 48–50. ISBN 978-0-521-30358-3.
Jump up ^ Hazel Rossotti (2002). Fire: Servant, Scourge, and Enigma. Courier Dover Publications. pp. 132–137. ISBN 978-0-486-42261-9.
Jump up ^ Jack Kelly Gunpowder: Alchemy, Bombards, and Pyrotechnics: The History of the Explosive that Changed the World, Perseus Books Group: 2005, ISBN 0-465-03722-4, ISBN 978-0-465-03722-3: 272 pages
Jump up ^ St. C. Easton: "Roger Bacon and his Search for a Universal Science", Oxford (1962)
^ Jump up to: a b Gábor Ágoston (2005). Guns for the sultan: military power and the weapons industry in the Ottoman Empire. Cambridge University Press. p. 15. ISBN 978-0-521-84313-3.
Jump up ^ Ingham-Brown, George (1989) The Big Bang: A History of Explosives, Sutton Publishers, ISBN 0-7509-1878-0, ISBN 978-0-7509-1878-7, page vi
Jump up ^ Kelly, Jack (2005) Gunpowder: Alchemy, Bombards, and Pyrotechnics: The History of the Explosive that Changed the World, Perseus Books Group, ISBN 0-465-03722-4, ISBN 978-0-465-03722-3, page 22
Jump up ^ Bert S. Hall, "Introduction, 1999" pp. xvi–xvii to the reprinting of James Riddick Partington (1960). A history of Greek fire and gunpowder. JHU Press. ISBN 978-0-8018-5954-0.
^ Jump up to: a b Peter Purton (2009). A History of the Late Medieval Siege, 1200–1500. Boydell & Brewer. pp. 108–109. ISBN 978-1-84383-449-6.
Jump up ^ Bert S. Hall, "Introduction, 1999" p. xvii to the reprinting of James Riddick Partington (1960). A history of Greek fire and gunpowder. JHU Press. ISBN 978-0-8018-5954-0.
Jump up ^ Buchanan. "Editor's Introduction: Setting the Context", in Buchanan 2006.
^ Jump up to: a b Chase 2003:31–32
Jump up ^ Lorge, Peter A. (2008). The Asian military revolution, 1300-2000 : from gunpowder to the bomb (1. publ. ed.). Cambridge: Cambridge University Press. p. 32. ISBN 978052160954-8.
Jump up ^ Kelly 2004:4
Jump up ^ The Big Book of Trivia Fun, Kidsbooks, 2004
Jump up ^ Peter Allan Lorge (2008), The Asian military revolution: from gunpowder to the bomb, Cambridge University Press, p. 18, ISBN 978-0-521-60954-8
Jump up ^ Needham 1986, p. 7 "Without doubt it was in the previous century, around +850, that the early alchemical experiments on the constituents of gunpowder, with its self-contained oxygen, reached their climax in the appearance of the mixture itself."
Jump up ^ Chase 2003:1 "The earliest known formula for gunpowder can be found in a Chinese work dating probably from the 800s. The Chinese wasted little time in applying it to warfare, and they produced a variety of gunpowder weapons, including flamethrowers, rockets, bombs, and land mines, before inventing firearms."
Jump up ^ Chase 2003:1
Jump up ^ Delgado, James (February 2003). "Relics of the Kamikaze". Archaeology (Archaeological Institute of America) 56 (1).
Jump up ^ Chase 2003:31
Jump up ^ Peter Allan Lorge (2008), The Asian military revolution: from gunpowder to the bomb, Cambridge University Press, pp. 33–34, ISBN 978-0-521-60954-8
Jump up ^ Kelly 2004:22 'Around year 1240, Arabs acquired knowledge of saltpeter ("Chinese snow") from the East, perhaps through India. They knew of gunpowder soon afterward. They also learned about fireworks ("Chinese flowers") and rockets ("Chinese arrows"). Arab warriors had acquired fire lances before year 1280. Around that same year, a Syrian named Hasan al-Rammah wrote a book that, as he put it, "treats of machines of fire to be used for amusement or for useful purposes." He talked of rockets, fireworks, fire lances, and other incendiaries, using terms that suggested he derived his knowledge from Chinese sources. He gave instructions for the purification of saltpeter and recipes for making different types of gunpowder.'
^ Jump up to: a b c d Hassan, Ahmad Y. "Transfer of Islamic Technology to the West: Part III". History of Science and Technology in Islam.
Jump up ^ Peter Watson (2006). Ideas: A History of Thought and Invention, from Fire to Freud. HarperCollins. p. 304. ISBN 978-0-06-093564-1. The first use of a metal tube in this context was made around 1280 in the wars between the Song and the Mongols, where a new term, chong, was invented to describe the new horror...Like paper, it reached the West via the Muslims, in this case the writings of the Andalusian botanist Ibn al-Baytar, who died in Damascus in 1248. The Arabic term for saltpetre is 'Chinese snow' while the Persian usage is 'Chinese salt'.28
Jump up ^ Cathal J. Nolan (2006). The age of wars of religion, 1000–1650: an encyclopedia of global warfare and civilization. Volume 1 of Greenwood encyclopedias of modern world wars. Greenwood Publishing Group. p. 365. ISBN 0-313-33733-0. Retrieved 2011-11-28. In either case, there is linguistic evidence of Chinese origins of the technology: in Damascus, Arabs called the saltpeter used in making gunpowder " Chinese snow," while in Iran it was called "Chinese salt." Whatever the migratory route
Jump up ^ Oliver Frederick Gillilan Hogg (1970). Artillery: its origin, heyday, and decline. Archon Books. p. 123. The Chinese were certainly acquainted with saltpetre, the essential ingredient of gunpowder. They called it Chinese Snow and employed it early in the Christian era in the manufacture of fireworks and rockets.
Jump up ^ Oliver Frederick Gillilan Hogg (1963). English artillery, 1326–1716: being the history of artillery in this country prior to the formation of the Royal Regiment of Artillery. Royal Artillery Institution. p. 42. The Chinese were certainly acquainted with saltpetre, the essential ingredient of gunpowder. They called it Chinese Snow and employed it early in the Christian era in the manufacture of fireworks and rockets.
Jump up ^ Oliver Frederick Gillilan Hogg (1993). Clubs to cannon: warfare and weapons before the introduction of gunpowder (reprint ed.). Barnes & Noble Books. p. 216. ISBN 1-56619-364-8. Retrieved 2011-11-28. The Chinese were certainly acquainted with saltpetre, the essential ingredient of gunpowder. They called it Chinese snow and used it early in the Christian era in the manufacture of fireworks and rockets.
Jump up ^ Partington, J. R. (1960). A History of Greek Fire and Gunpowder (illustrated, reprint ed.). JHU Press. p. 335. ISBN 0801859549. Retrieved 2014-11-21.
Jump up ^ Needham, Joseph; Yu, Ping-Yu (1980). Needham, Joseph, ed. Science and Civilisation in China: Volume 5, Chemistry and Chemical Technology, Part 4, Spagyrical Discovery and Invention: Apparatus, Theories and Gifts. Volume 5 (Issue 4 of Science and Civilisation in China). Contributors Joseph Needham, Lu Gwei-Djen, Nathan Sivin (illustrated, reprint ed.). Cambridge University Press. p. 194. ISBN 052108573X. Retrieved 2014-11-21.
Jump up ^ Khan 1996
^ Jump up to: a b Khan 2004:6
Jump up ^ Ancient Discoveries, Episode 12: Machines of the East, History Channel, 2007 (Part 4 and Part 5)
Jump up ^ Nelson, Cameron Rubaloff (2010-07). Manufacture and transportation of gunpowder in the Ottoman Empire: 1400-1800 M.A. Thesis.
Jump up ^ William H. McNeill (1992). The Rise of the West: A History of the Human Community. University of Chicago Press. p. 492. ISBN 0-226-56141-0. Retrieved 29 July 2011.
Jump up ^ Michael Kohn (2006), Dateline Mongolia: An American Journalist in Nomad's Land, RDR Books, p. 28, ISBN 1-57143-155-1, retrieved 29 July 2011
Jump up ^ Robert Cowley (1993). Robert Cowley, ed. Experience of War (reprint ed.). Random House Inc. p. 86. ISBN 0-440-50553-4. Retrieved 29 July 2011.
Jump up ^ Kenneth Warren Chase (2003). Firearms: a global history to 1700 (illustrated ed.). Cambridge University Press. p. 58. ISBN 0-521-82274-2. Retrieved 29 July 2011.
Jump up ^ C. F. Temler, Historische Abhandlungen der Koniglichen Gesellschaft der Wissenschaften zu Kopenhagen ... ubersetzt ... von V. A. Heinze, Kiel, Dresden and Leipzig, 1782, i, 168, as cited in Partington, p. 228, footnote 6.
Jump up ^ Joseph Needham; Gwei-Djen Lu; Ling Wang (1987). Science and civilisation in China, Volume 5, Part 7. Cambridge University Press. p. 358. ISBN 978-0-521-30358-3.
Jump up ^ Bert S. Hall, "Introduction, 1999" p. xxiv to the reprinting of James Riddick Partington (1960). A history of Greek fire and gunpowder. JHU Press. ISBN 978-0-8018-5954-0.
Jump up ^ Partington 1960:60
^ Jump up to: a b Partington 1960:48–49, 54
^ Jump up to: a b Partington 1960:82–83
^ Jump up to: a b c d Kelly 2004, p.61
Jump up ^ Molerus, Otto. "History of Civilization in the Western Hemisphere from the Point of View of Particulate Technology, Part 2," Advanced Powder Technology 7 (1996): 161-66
Jump up ^ Microsoft Encarta Online Encyclopedia 2007 Archived 31 October 2009.
Jump up ^ In 1777 Lavoisier named oxygen, which had earlier been isolated by Priestley; the realization that saltpeter contained this substance was fundamental to understanding gunpowder.
^ Jump up to: a b Kelly 2004, p.164
Jump up ^ Metzner, Paul (1998), Crescendo of the Virtuoso: Spectacle, Skill, and Self-Promotion in Paris during the Age of Revolution, University of California Press
^ Jump up to: a b c d Cocroft 2000, "Success to the Black Art!". Chapter 1
Jump up ^ Ross, Charles. The Custom of the Castle: From Malory to Macbeth. Berkeley: University of California Press, c1997. [1] pages 131-130
Jump up ^ The Noble-Abel Equation of State: Thermodynamic Derivations for Ballistics Modelling
Jump up ^ Pritchard, Tom; Evans, Jack; Johnson, Sydney (1985), The Old Gunpowder Factory at Glynneath, Merthyr Tydfil: Merthyr Tydfil & District Naturalists' Society
^ Jump up to: a b c d e Cocroft 2000, "The demise of gunpowder". Chapter 4
Jump up ^ MacDougall, Ian (2000). 'Oh, ye had to be careful' : personal recollections by Roslin gunpowder mill and bomb factory workers. East Linton, Scotland: Tuckwell Press in association with the European Ethnological Research Centre and the Scottish Working People's History Trust. ISBN 1-86232-126-4.
Jump up ^ Iqtidar Alam Khan (2004). Gunpowder And Firearms: Warfare In Medieval India. Oxford University Press. ISBN 978-0-19-566526-0.
^ Jump up to: a b Iqtidar Alam Khan (25 April 2008). Historical Dictionary of Medieval India. Scarecrow Press. p. 157. ISBN 978-0-8108-5503-8.
^ Jump up to: a b Khan 2004:9–10
Jump up ^ Khan 2004:10
Jump up ^ Partington (Johns Hopkins University Press edition, 1999), 225
Jump up ^ Partington (Johns Hopkins University Press edition, 1999), 226
Jump up ^ http://www.youtube.com/watch?v=DTfEDaWMj4o
^ Jump up to: a b c "India." Encyclopædia Britannica. Encyclopedia Britannica 2008 Ultimate Reference Suite. Chicago: Encyclopedia Britannica, 2008.
Jump up ^ "rocket and missile system." Encyclopædia Britannica. Encyclopædia Britannica 2008 Ultimate Reference Suite. Chicago: Encyclopædia Britannica, 2008.
^ Jump up to: a b Dipanegara, P. B. R. Carey, Babad Dipanagara: an account of the outbreak of the Java war, 1825-30 : the Surakarta court version of the Babad Dipanagara with translations into English and Indonesian volume 9: Council of the M.B.R.A.S. by Art Printing Works: 1981.
Jump up ^ Atsushi, Ota (2006). Changes of regime and social dynamics in West Java : society, state, and the outer world of Banten, 1750-1830. Leiden: Brill. ISBN 90-04-15091-9.
^ Jump up to: a b Thomas Stamford Raffles, The History of Java, Oxford University Press, 1965 (originally published in 1817), ISBN 0-19-580347-7
Jump up ^ Raffles, Thomas Stamford (1978). The History of Java ([Repr.]. ed.). Kuala Lumpur: Oxford University Press. ISBN 0-19-580347-7.
Jump up ^ US Department of Agriculture (1917). Department Bulleting No. 316: Willows: Their growth, use, and importance. The Department. p. 31.
Jump up ^ Kelly 2004, p.200
^ Jump up to: a b Earl 1978, Chapter 2: The Development of Gunpowder
Jump up ^ Kelly 2004:60–63
Jump up ^ Kelly 2004, p.199
Jump up ^ Frangsmyr, Tore, J. L. Heilbron, and Robin E. Rider, editors The Quantifying Spirit in the Eighteenth Century. Berkeley: University of California Press, c1990. http://ark.cdlib.org/ark:/13030/ft6d5nb455/ p. 292.
Jump up ^ C.E. Munroe (1885) "Notes on the literature of explosives no. VIII", Proceedings of the US Naval Institute, no. XI, p. 285
Jump up ^ The History of the 10.4×38 Swiss Cartridge
Jump up ^ Blackpowder to Pyrodex and Beyond by Randy Wakeman at Chuck Hawks
Jump up ^ The History and Art of Shotshells by Jon Farrar, Nebraskaland Magazine
Jump up ^ Buchanan. "Editor's Introduction: Setting the Context", in Buchanan 2006, p. 4.
Jump up ^ Black Powder Recipes, Ulrich Bretscher
Jump up ^ Julian S. Hatcher, Hatcher's Notebook, Military Service Publishing Company, 1947. Chapter XIII Notes on Gunpowder, pages 300-305.
Jump up ^ Kelly 2004, p.218
Jump up ^ Book title Workshop Receipts Publisher William Clowes and Son limited Author Ernest Spon. Date 1 August 1873.
Jump up ^ GunpowderTranslation. Academic. Retrieved 2014-08-31.
Jump up ^ Cathal J. Nolan (2006), The age of wars of religion, 1000-1650: an encyclopedia of global warfare and civilization, Greenwood Publishing Group, p. 365, ISBN 978-0-313-33733-8
^ Jump up to: a b c Kelly 2004, p58
^ Jump up to: a b c John Francis Guilmartin (2003). Gunpowder & galleys: changing technology & Mediterranean warfare at sea in the 16th century. Conway Maritime Press. pp. 109–110 and 298–300. ISBN 0851779514.
Jump up ^ T.J. Rodman (1861), Reports of experiments on the properties of metals for cannon and the qualities of cannon powder, p. 270
^ Jump up to: a b Kelly 2004, p.195
Jump up ^ Tenney L. Davis (1943). The Chemistry of Powder and Explosives (PDF). p. 139.
^ Jump up to: a b Brown, G.I. (1998) The Big Bang: A history of Explosives Sutton Publishing pp.22&32 ISBN 0-7509-1878-0
^ Jump up to: a b c Kelly 2004, p.224
^ Jump up to: a b Rodney James (2011). The ABCs of Reloading: The Definitive Guide for Novice to Expert (9 ed.). Krause Publications. pp. 53–59. ISBN 978-1-4402-1396-0.
Jump up ^ Sharpe, Philip B. (1953) Complete Guide to Handloading Funk & Wagnalls p.137
Jump up ^ Wakeman, Randy. "Blackpowder to Pyrodex and Beyond". Retrieved 31 August 2014.
Jump up ^ "LESMOK POWDER".
Jump up ^ Julian S. Hatcher, Hatcher's Notebook, Stackpole Books, 1962. Chapter XIV, Gun Corrosion and Ammunition Developments, pages 346-349.
Jump up ^ Wakeman, Randy. "Blackpowder to Pyrodex and Beyond".
Jump up ^ Flash! Bang! Whiz!, University of Denver
Jump up ^ Parker, Harold T. (1983). Three Napoleonic battles. (Repr., Durham, 1944. ed.). Durham, NC: Duke Univ. Pr. p. 83. ISBN 0-8223-0547-X.
Jump up ^ Larrey is quoted in French at Dr Béraud, Études Hygiéniques de la chair de cheval comme aliment, Musée des Familles (1841-42).
Jump up ^ Rediker, Marcus (1989). Between the devil and the deep blue sea : merchant seamen, pirates, and the Anglo-American maritime world, 1700-1750 (1st pbk. ed. ed.). Cambridge: Cambridge University Press. p. 12. ISBN 9780521379830.
Jump up ^ "Gunpowder Now Used To Drive Rivets And Splice Cables", April 1932, Popular Science
Jump up ^ "MasterBlaster System". Remington Products.
Jump up ^ Mining Journal 22 January 1853, p. 61
Benton, Captain James G. (1862). A Course of Instruction in Ordnance and Gunnery (2 ed.). West Point, New York: Thomas Publications. ISBN 1-57747-079-6..
Brown, G. I. (1998). The Big Bang: A History of Explosives. Sutton Publishing. ISBN 0-7509-1878-0..
Buchanan, Brenda J., ed. (2006). Gunpowder, Explosives and the State: A Technological History. Aldershot: Ashgate. ISBN 0-7546-5259-9..
Chase, Kenneth (2003). Firearms: A Global History to 1700. Cambridge University Press. ISBN 0-521-82274-2..
Cocroft, Wayne (2000). Dangerous Energy: The archaeology of gunpowder and military explosives manufacture. Swindon: English Heritage. ISBN 1-85074-718-0..
Crosby, Alfred W. (2002). Throwing Fire: Projectile Technology Through History. Cambridge University Press. ISBN 0-521-79158-8..
Earl, Brian (1978). Cornish Explosives. Cornwall: The Trevithick Society. ISBN 0-904040-13-5..
al-Hassan, Ahmad Y.. "History of Science and Technology in Islam". |chapter= ignored (help).
Johnson, Norman Gardner. "explosive". Encyclopædia Britannica. Chicago: Encyclopædia Britannica Online..
Kelly, Jack (2004). Gunpowder: Alchemy, Bombards, & Pyrotechnics: The History of the Explosive that Changed the World. Basic Books. ISBN 0-465-03718-6..
Khan, Iqtidar Alam (1996). "Coming of Gunpowder to the Islamic World and North India: Spotlight on the Role of the Mongols". Journal of Asian History 30: 41–5..
Khan, Iqtidar Alam (2004). "Gunpowder and Firearms: Warfare in Medieval India". Oxford University Press. doi:10.1086/ahr.111.3.817..
Needham, Joseph (1986). "Science & Civilisation in China". V:7: The Gunpowder Epic. Cambridge University Press. ISBN 0-521-30358-3..
Norris, John (2003). Early Gunpowder Artillery: 1300-1600. Marlborough: The Crowood Press. ISBN 9781861266156..
Partington, J.R. (1960). A History of Greek Fire and Gunpowder. Cambridge, UK: W. Heffer & Sons..
Partington, James Riddick; Hall, Bert S. (1999). A History of Greek Fire and Gunpowder. Baltimore: Johns Hopkins University Press. doi:10.1353/tech.2000.0031. ISBN 0-8018-5954-9.
Urbanski, Tadeusz (1967). "Chemistry and Technology of Explosives" III. New York: Pergamon Press..
External links[edit]
Wikimedia Commons has media related to Gunpowder.
Look up gunpowder in Wiktionary, the free dictionary.
Gun and Gunpowder
The Origins of Gunpowder
Cannons and Gunpowder
Oare Gunpowder Works, Kent, UK
Royal Gunpowder Mills
The DuPont Company on the Brandywine A digital exhibit produced by the Hagley Library that covers the founding and early history of the DuPont Company powder yards in Delaware
"Ulrich Bretschler's Gunpowder Chemistry page".
Video Demonstration of the Medieval Siege Society's Guns, Including showing ignition of gunpowder
Black Powder Recipes
"Dr. Sasse's investigations (and others) found via search at US DTIC.MIL These contain scientific studies of BP properties and details of measurement techniques.".
Categories: GunpowderChinese inventionsExplosivesFirearm propellantsPyrotechnic compositionsRocket fuelsSolid fuels
Navigation menu
Create accountLog inArticleTalkReadEditView history
Main page
Contents
Featured content
Current events
Random article
Donate to Wikipedia
Wikimedia Shop
Interaction
Help
About Wikipedia
Community portal
Recent changes
Contact page
Tools
What links here
Related changes
Upload file
Special pages
Permanent link
Page information
Wikidata item
Cite this page
Print/export
Create a book
Download as PDF
Printable version
Languages
Afrikaans
العربية
Aragonés
Asturianu
Azərbaycanca
Башҡортса
Беларуская
Беларуская (тарашкевіца)
Български
Bosanski
Brezhoneg
Буряад
Català
Чӑвашла
Čeština
Corsu
Cymraeg
Dansk
Deutsch
Eesti
Ελληνικά
Español
Esperanto
Euskara
فارسی
Français
Gaeilge
Galego
贛語
Хальмг
한국어
हिन्दी
Hrvatski
Ilokano
Bahasa Indonesia
Íslenska
Italiano
עברית
Kapampangan
Kiswahili
Kurdî
Latina
Latviešu
Lietuvių
Limburgs
Magyar
Македонски
മലയാളം
مصرى
Монгол
Nederlands
नेपाली
नेपाल भाषा
日本語
Нохчийн
Norsk bokmål
Norsk nynorsk
Occitan
Oʻzbekcha
پنجابی
Polski
Português
Română
Runa Simi
Русский
Саха тыла
Scots
Shqip
Sicilianu
Simple English
Slovenčina
Slovenščina
کوردی
Српски / srpski
Srpskohrvatski / српскохрватски
Suomi
Svenska
Tagalog
தமிழ்
Татарча/tatarça
ไทย
Türkçe
Українська
اردو
Tiếng Việt
Võro
Winaray
ייִדיש
粵語
Žemaitėška
中文
Edit links
This page was last modified on 28 November 2014 at 05:37.
Text is available under the Creative Commons Attribution-ShareAlike License; additional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.
Privacy policyAbout WikipediaDisclaimersContact WikipediaDevelopersMobile viewWikimedia Foundation Powered by MediaWiki
Smokeless powder
From Wikipedia, the free encyclopedia
Finnish smokeless powder
Smokeless powder is the name given to a number of propellants used in firearms and artillery that produce negligible smoke when fired, unlike the black powder they replaced. The term is unique to the United States and is generally not used in other English-speaking countries, which initially used proprietary names such as "Ballistite" and "Cordite" but gradually shifted to "propellant" as the generic term.
The basis of the term smokeless is that the combustion products are mainly gaseous, compared to around 55% solid products (mostly potassium carbonate, potassium sulfate, and potassium sulfide) for black powder.[1] Despite its name, smokeless powder is not completely smoke-free;[2] while there may be little noticeable smoke from small-arms ammunition, smoke from artillery fire can be substantial. This article focuses on nitrocellulose formulations, but the term smokeless powder was also used to describe various picrate mixtures with nitrate, chlorate, or dichromate oxidizers during the late 19th century, before the advantages of nitrocellulose became evident.[3]
Since the 14th century[4] gunpowder was not actually a physical "powder," and smokeless powder can only be produced as a pelletized or extruded granular material. Smokeless powder allowed the development of modern semi- and fully automatic firearms and lighter breeches and barrels for artillery. Burnt black powder leaves a thick, heavy fouling that is hygroscopic and causes rusting of the barrel. The fouling left by smokeless powder exhibits none of these properties (though some primer compounds can leave hygroscopic salts that have a similar effect; non-corrosive primer compounds were introduced in the 1920s[5][6]). This makes an autoloading firearm with many moving parts feasible (which would otherwise jam or seize under heavy black powder fouling).
Smokeless powders are classified as, typically, division 1.3 explosives under the UN Recommendations on the transportation of Dangerous goods – Model Regulations, regional regulations (such as ADR) and national regulations (such the United States' ATF). However, they are used as solid propellants; in normal use, they undergo deflagration rather than detonation.
Contents [hide]
1 Background
2 Nitroglycerine and guncotton
3 Propellant improvements
4 Chemical formulations
5 Instability and stabilization
6 Physical variations
7 Smokeless propellant components
8 Manufacturing
9 Flashless propellant
10 See also
11 References
11.1 Notes
11.2 Sources
12 External links
Background[edit]
Military commanders had been complaining since the Napoleonic Wars about the problems of giving orders on a battlefield obscured by the smoke of firing. Verbal commands could not be heard above the noise of the guns, and visual signals could not be seen through the thick smoke from the gunpowder used by the guns. Unless there was a strong wind, after a few shots, soldiers using black powder ammunition would have their view obscured by a huge cloud of smoke. Snipers or other concealed shooters were given away by a cloud of smoke over the firing position. Black powder is also corrosive, making cleaning mandatory after every use. Likewise, black powder's tendency to produce severe fouling caused actions to jam and often made reloading difficult.
Nitroglycerine and guncotton[edit]
Nitroglycerine was synthesized by the Italian chemist Ascanio Sobrero in 1847.[7] It was subsequently developed and manufactured by Alfred Nobel as an industrial explosive, but even then it was unsuitable as a propellant: despite its energetic and smokeless qualities, it detonates instead of deflagrating smoothly, making it more amenable to shattering a gun than propelling a projectile out of it. Nitroglycerine per se is also highly unstable, making it unfit to be carried in battlefield conditions.
A major step forward was the discovery of guncotton, a nitrocellulose-based material, by Swiss chemist Christian Friedrich Schönbein in 1846. He promoted its use as a blasting explosive[8] and sold manufacturing rights to the Austrian Empire. Guncotton was more powerful than gunpowder, but at the same time was once again somewhat more unstable. John Taylor obtained an English patent for guncotton; and John Hall & Sons began manufacture in Faversham.
English interest languished after an explosion destroyed the Faversham factory in 1847. Austrian Baron Wilhelm Lenk von Wolfsberg built two guncotton plants producing artillery propellent, but it too was dangerous under field conditions, and guns that could fire thousands of rounds using gunpowder would reach their service life after only a few hundred shots with the more powerful guncotton. Small arms could not withstand the pressures generated by guncotton at all.
After one of the Austrian factories blew up in 1862, Thomas Prentice & Company began manufacturing guncotton in Stowmarket in 1863; and British War Office chemist Sir Frederick Abel began thorough research at Waltham Abbey Royal Gunpowder Mills leading to a manufacturing process that eliminated the impurities in nitrocellulose making it safer to produce and a stable product safer to handle. Abel patented this process in 1865, when the second Austrian guncotton factory exploded. After the Stowmarket factory exploded in 1871, Waltham Abbey began production of guncotton for torpedo and mine warheads.[9]
Propellant improvements[edit]
In 1863, Prussian artillery captain Johann F. E. Schultze patented a small arms propellent of nitrated hardwood impregnated with saltpetre or barium nitrate. Prentice received an 1866 patent for a sporting powder of nitrated paper manufactured at Stowmarket, but ballistic uniformity suffered as the paper absorbed atmospheric moisture. In 1871, Frederick Volkmann received an Austrian patent for a colloided version of Schultze powder called Collodin, which he manufactured near Vienna for use in sporting firearms. Austrian patents were not published at the time, and the Austrian Empire considered the operation a violation of the government monopoly on explosives manufacture and closed the Volkmann factory in 1875.[9] In 1882, the Explosives Company at Stowmarket patented an improved formulation of nitrated cotton gelatinised by ether-alcohol with nitrates of potassium and barium. These propellants were suitable for shotguns but not rifles.[10]
Poudre B single-base smokeless powder flakes
In 1884, Paul Vieille invented a smokeless powder called Poudre B (short for poudre blanche—white powder, as distinguished from black powder)[11] made from 68.2% insoluble nitrocellulose, 29.8% soluble nitrocellusose gelatinized with ether and 2% paraffin. This was adopted for the Lebel rifle.[12] It was passed through rollers to form paper thin sheets, which were cut into flakes of the desired size.[11] The resulting propellant, today known as pyrocellulose, contains somewhat less nitrogen than guncotton and is less volatile. A particularly good feature of the propellant is that it will not detonate unless it is compressed, making it very safe to handle under normal conditions.
Vieille's powder revolutionized the effectiveness of small guns, because it gave off almost no smoke and was three times more powerful than black powder. Higher muzzle velocity meant a flatter trajectory and less wind drift and bullet drop, making 1000 meter shots practicable. Since less powder was needed to propel a bullet, the cartridge could be made smaller and lighter. This allowed troops to carry more ammunition for the same weight. Also, it would burn even when wet. Black powder ammunition had to be kept dry and was almost always stored and transported in watertight cartridges.
Other European countries swiftly followed and started using their own versions of Poudre B, the first being Germany and Austria, which introduced new weapons in 1888. Subsequently Poudre B was modified several times with various compounds being added and removed. Krupp began adding diphenylamine as a stabilizer in 1888.[9]
Meanwhile, in 1887, Alfred Nobel obtained an English patent for a smokeless gunpowder he called Ballistite. In this propellant the fibrous structure of cotton (nitro-cellulose) was destroyed by a nitro-glycerine solution instead of a solvent.[13] In England in 1889, a similar powder was patented by Hiram Maxim, and in the USA in 1890 by Hudson Maxim.[14] Ballistite was patented in the United States in 1891.
The Germans adopted ballistite for naval use in 1898, calling it WPC/98. The Italians adopted it as filite, in cord instead of flake form, but realising its drawbacks changed to a formulation with nitroglycerine they called solenite. In 1891 the Russians tasked the chemist Mendeleef with finding a suitable propellant, he created nitrocellulose gelatinised by ether-alcohol, which produced more nitrogen and more uniform colloidal structure than the French use of nitro-cottons in Poudre B. He called it pyro-collodion.[13]
Britain conducted trials on all the various types of propellant brought to their attention, but were dissatisfied with them all and sought something superior to all existing types. In 1889, Sir Frederick Abel, James Dewar and Dr W Kellner patented (Nos 5614 and 11,664 in the names of Abel and Dewar) a new formulation that was manufactured at the Royal Gunpowder Factory at Waltham Abbey. It entered British service in 1891 as Cordite Mark 1. Its main composition was 58% Nitro-glycerine, 37% Guncotton and 3% mineral jelly. A modified version, Cordite MD, entered service in 1901, this increased guncotton to 65% and reduced nitro-glycerine to 30%, this change reduced the combustion temperature and hence erosion and barrel wear. Cordite's advantages over gunpowder were reduced maximum pressure in the chamber (hence lighter breeches, etc.) but longer high pressure. Cordite could be made in any desired shape or size.[15] The creation of cordite led to a lengthy court battle between Nobel, Maxim, and another inventor over alleged British patent infringement.
The Anglo-American Explosives Company began manufacturing its shotgun powder in Oakland, New Jersey in 1890. DuPont began producing guncotton at Carneys Point Township, New Jersey in 1891.[3] Charles E. Munroe of the Naval Torpedo Station in Newport, Rhode Island patented a formulation of guncotton colloided with nitrobenzene, called Indurite, in 1891.[16] Several United States firms began producing smokeless powder when Winchester Repeating Arms Company started loading sporting cartridges with Explosives Company powder in 1893. California Powder Works began producing a mixture of nitroglycerine and nitrocellulose with ammonium picrate as Peyton Powder, Leonard Smokeless Powder Company began producing nitroglycerine-nitrocellulose Ruby powders, Laflin & Rand negotiated a license to produce Ballistite, and DuPont started producing smokeless shotgun powder. The United States Army evaluated 25 varieties of smokeless powder and selected Ruby and Peyton Powders as the most suitable for use in the Krag-Jørgensen service rifle. Ruby was preferred, because tin-plating was required to protect brass cartridge cases from picric acid in the Peyton Powder. Rather than paying the required royalties for Ballistite, Laflin & Rand financed Leonard's reorganization as the American Smokeless Powder Company. United States Army Lieutenant Whistler assisted American Smokeless Powder Company factory superintendent Aspinwall in formulating an improved powder named W.A. for their efforts. W.A. smokeless powder was the standard for United States military service rifles from 1897 until 1908.[3]
In 1897, United States Navy Lieutenant John Bernadou patented a nitrocellulose powder colloided with ether-alcohol.[16] The Navy licensed or sold patents for this formulation to DuPont and the California Powder Works while retaining manufacturing rights for the Naval Powder Factory, Indian Head, Maryland constructed in 1900. The United States Army adopted the Navy single-base formulation in 1908 and began manufacture at Picatinny Arsenal.[3] By that time Laflin & Rand had taken over the American Powder Company to protect their investment, and Laflin & Rand had been purchased by DuPont in 1902.[17] Upon securing a 99-year lease of the Explosives Company in 1903, DuPont enjoyed use of all significant smokeless powder patents in the United States, and was able to optimize production of smokeless powder.[3] When government anti-trust action forced divestiture in 1912, DuPont retained the nitrocellulose smokeless powder formulations used by the United States military and released the double-base formulations used in sporting ammunition to the reorganized Hercules Powder Company. These newer propellants were more stable and thus safer to handle than Poudre B, and also more powerful.
Chemical formulations[edit]
"Double base" redirects here. For the musical instrument, see double bass.
Currently, propellants using nitrocellulose (detonation velocity 7,300 m/s (23,950 ft/s)) (typically an ether-alcohol colloid of nitrocellulose) as the sole explosive propellant ingredient are described as single-base powder.[18]
Propellants mixtures containing nitrocellulose and nitroglycerin (detonation velocity 7,700 m/s (25,260 ft/s)) as explosive propellant ingredients are known as double-base powder.[19]
During the 1930s triple-base propellant containing nitrocellulose, nitroglycerin, and a substantial quantity of nitroguanidine (detonation velocity 8,200 m/s (26,900 ft/s)) as explosive propellant ingredients was developed. These propellant mixtures have reduced flash and flame temperature without sacrificing chamber pressure compared to single and double base propellants, albeit at the cost of more smoke.
In practice, triple base propellants are reserved mainly for large caliber ammunition such as used in (naval) artillery and tank guns. During World War II it had some use by British artillery. After that war it became the standard propellant in all British large caliber ammunition designs except small-arms. Most western nations, except the United States, followed a similar path.
In the late 20th century new propellant formulations started to appear. These are based on nitroguanidine and high explosives of the RDX (detonation velocity 8,750 m/s (28,710 ft/s)) type.
Instability and stabilization[edit]
Nitrocellulose deteriorates with time, yielding acidic byproducts. Those byproducts catalyze the further deterioration, increasing its rate. The released heat, in case of bulk storage of the powder, or too large blocks of solid propellant, can cause self-ignition of the material. Single-base nitrocellulose propellants are hygroscopic and most susceptible to degradation; double-base and triple-base propellants tend to deteriorate more slowly. To neutralize the decomposition products, which could otherwise cause corrosion of metals of the cartridges and gun barrels, calcium carbonate is added to some formulations.
To prevent buildup of the deterioration products, stabilizers are added. Diphenylamine is one of the most common stabilizers used. Nitrated analogs of diphenylamine formed in the process of stabilizing decomposing powder are sometimes used as stabilizers themselves.[20][21] The stabilizers are added in the amount of 0.5–2% of the total amount of the formulation; higher amounts tend to degrade its ballistic properties. The amount of the stabilizer is depleted with time. Propellants in storage should be periodically tested for the amount of stabilizer remaining, as its depletion may lead to auto-ignition of the propellant.
Physical variations[edit]
Ammunition handloading powders
Smokeless powder may be corned into small spherical balls or extruded into cylinders or strips with many cross-sectional shapes (strips with various rectangular proportions, single or multi-hole cylinders, slotted cylinders) using solvents such as ether. These extrusions can be cut into short ('flakes') or long pieces ('cords' many inches long). Cannon powder has the largest pieces.
The properties of the propellant are greatly influenced by the size and shape of its pieces. The specific surface area of the propellant influences the speed of burning, and the size and shape of the particles determine the specific surface area. By manipulation of the shape it is possible to influence the burning rate and hence the rate at which pressure builds during combustion. Smokeless powder burns only on the surfaces of the pieces. Larger pieces burn more slowly, and the burn rate is further controlled by flame-deterrent coatings that retard burning slightly. The intent is to regulate the burn rate so that a more or less constant pressure is exerted on the propelled projectile as long as it is in the barrel so as to obtain the highest velocity. The perforations stabilize the burn rate because as the outside burns inward (thus shrinking the burning surface area) the inside is burning outward (thus increasing the burning surface area, but faster, so as to fill up the increasing volume of barrel presented by the departing projectile).[22] Fast-burning pistol powders are made by extruding shapes with more area such as flakes or by flattening the spherical granules. Drying is usually performed under a vacuum. The solvents are condensed and recycled. The granules are also coated with graphite to prevent static electricity sparks from causing undesired ignitions.[23]
Faster-burning propellants generate higher temperatures and higher pressures, however they also increase wear on gun barrels.
Smokeless propellant components[edit]
The propellant formulations may contain various energetic and auxiliary components:
Propellants:
Nitrocellulose, an energetic component of most smokeless propellants[24]
Nitroglycerin, an energetic component of double-base and triple-base formulations[24]
Nitroguanidine, a component of triple-base formulations[24]
D1NA (bis-nitroxyethylnitramine)[25]
Fivonite (tetramethylolcyclopentanone)[25]
DGN (di-ethylene glycol dinitrate)[26]
Acetyl cellulose[27]
Deterrents, (or moderants), to slow the burning rate
Centralites (symmetrical diphenyl urea—primarily diethyl or dimethyl)[28][29]
Dibutyl phthalate[24][29]
Dinitrotoluene (toxic, carcinogenic, and obsolete)[24][30]
Akardite (asymmetrical diphenyl urea)[26]
ortho-tolyl urethane[31]
Polyester adipate
Camphor (obsolete)[29]
Stabilizers, to prevent or slow down self-decomposition[32]
Diphenylamine[33]
Petroleum jelly[34]
Calcium carbonate[24]
Magnesium oxide[26]
Sodium bicarbonate[27]
beta-naphthol methyl ether[31]
Amyl alcohol (obsolete)[35]
Aniline (obsolete)[36]
Decoppering additives, to hinder the buildup of copper residues from the gun barrel rifling
Tin metal and compounds (e.g., tin dioxide)[24][37]
Bismuth metal and compounds (e.g., bismuth trioxide, bismuth subcarbonate, bismuth nitrate, bismuth antimonide); the bismuth compounds are favored as copper dissolves in molten bismuth, forming brittle and easily removable alloy
Lead foil and lead compounds, phased out due to toxicity[25]
Flash reducers, to reduce the brightness of the muzzle flash (all have a disadvantage: the production of smoke)[38]
Potassium chloride[39]
Potassium nitrate
Potassium sulfate[24][37]
Potassium hydrogen tartarate (a byproduct of wine production formerly used by French artillery)[39]
Wear reduction additives, to lower the wear of the gun barrel liners[40]
Wax
Talc
Titanium dioxide
Polyurethane jackets over the powder bags, in large guns
Other additives
Ethyl acetate, a solvent for manufacture of spherical powder[34]
Rosin, a surfactant to hold the grain shape of spherical powder
Graphite, a lubricant to cover the grains and prevent them from sticking together, and to dissipate static electricity[23]
Manufacturing[edit]
This section describes procedures used in the United States. See Cordite for alternative procedures formerly used in the United Kingdom.
The United States Navy manufactured single-base tubular powder for naval artillery at Indian Head, Maryland, beginning in 1900. Similar procedures were used for United States Army production at Picatinny Arsenal beginning in 1907[18] and for manufacture of smaller grained Improved Military Rifle (IMR) powders after 1914. Short-fiber cotton linter was boiled in a solution of sodium hydroxide to remove vegetable waxes, and then dried before conversion to nitrocellulose by mixing with concentrated nitric and sulfuric acids. Nitrocellulose still resembles fibrous cotton at this point in the manufacturing process, and was typically identified as pyrocellulose because it would spontaneously ignite in air until unreacted acid was removed. The term guncotton was also used; although some references identify guncotton as a more extensively nitrated and refined product used in torpedo and mine warheads prior to use of TNT.[41]
Unreacted acid was removed from pyrocellulose pulp by a multistage draining and water washing process similar to that used in paper mills during production of chemical woodpulp. Pressurized alcohol removed remaining water from drained pyrocellulose prior to mixing with ether and diphenylamine. The mixture was then fed through a press extruding a long turbular cord form to be cut into grains of the desired length.[42]
Alcohol and ether were then evaporated from "green" powder grains to a remaining solvent concentration between 3 percent for rifle powders and 7 percent for large artillery powder grains. Burning rate is inversely proportional to solvent concentration. Grains were coated with electrically conductive graphite to minimize generation of static electricity during subsequent blending. "Lots" containing more than ten tonnes of powder grains were mixed through a tower arrangement of blending hoppers to minimize ballistic differences. Each blended lot was then subjected to testing to determine the correct loading charge for the desired performance.[43][44]
Military quantities of old smokeless powder were sometimes reworked into new lots of propellants.[45] Through the 1920s Dr. Fred Olsen worked at Picatinny Arsenal experimenting with ways to salvage tons of single-base cannon powder manufactured for World War I. Dr. Olsen was employed by Western Cartridge Company in 1929 and developed a process for manufacturing spherical smokeless powder by 1933.[46] Reworked powder or washed pyrocellulose can be dissolved in ethyl acetate containing small quantities of desired stabilizers and other additives. The resultant syrup, combined with water and surfactants, can be heated and agitated in a pressurized container until the syrup forms an emulsion of small spherical globules of the desired size. Ethyl acetate distills off as pressure is slowly reduced to leave small spheres of nitrocellulose and additives. The spheres can be subsequently modified by adding nitroglycerine to increase energy, flattening between rollers to a uniform minimum dimension, coating with phthalate deterrents to retard ignition, and/or glazing with graphite to improve flow characteristics during blending.[47][48]
Modern smokeless powder is produced in the United States by St. Marks Powder, Inc. owned by General Dynamics.[49]
Flashless propellant[edit]
Muzzle flash is the light emitted in the vicinity of the muzzle by the hot propellant gases and the chemical reactions that follow as the gases mix with the surrounding air. Before projectiles exit a slight pre-flash may occur from gases leaking past the projectiles. Following muzzle exit the heat of gases is usually sufficient to emit visible radiation – the primary flash. The gases expand but as they pass through the Mach disc they are re-compressed to produce an intermediate flash. Hot combustible gases (e.g. hydrogen and carbon-monoxide) may follow when they mix with oxygen in the surrounding air to produce the secondary flash, the brightest. The secondary flash does not usually occur with small-arms.[50]
Nitrocellulose contains insufficient oxygen to completely oxidize its carbon and hydrogen. The oxygen deficit is increased by addition of graphite and organic stabilizers. Products of combustion within the gun barrel include flammable gasses like hydrogen and carbon monoxide. At high temperature, these flammable gasses will ignite when turbulently mixed with atmospheric oxygen beyond the muzzle of the gun. During night engagements the flash produced by ignition can reveal the location of the gun to enemy forces[51] and cause temporary night-blindness among the gun crew by photo-bleaching visual purple.[52]
Flash suppressors are commonly used on small arms to reduce the flash signature, but this approach is not practical for artillery. Artillery muzzle flash up to 150 feet (46 m) from the muzzle has been observed, and can be reflected off clouds and be visible for distances up to 30 miles (48 km).[51] For artillery the most effective method is a propellant that produces a large proportion of inert nitrogen at relatively low temperatures that dilutes the combustible gases. Triple based propellants are used for this because of the nitrogen in the nitroguandine.[53]
Before the use of triple based propellants the usual method of flash reduction was to add inorganic salts like potassium chloride so their specific heat capacity might reduce the temperature of combustion gasses and their finely divided particulate smoke might block visible wavelengths of radiant energy of combustion.[39]
See also[edit]
Portal icon Pyrotechnics portal
Antique guns
Ballistite
Cordite
Firearms
Gunpowder
Nitrocellulose
Small arms
Brown-brown – a drug created by mixing cocaine with cartridge powder
References[edit]
Notes[edit]
Jump up ^ Hatcher, Julian S. and Barr, Al Handloading Hennage Lithograph Company (1951) p.34
Jump up ^ Fairfield, A. P., CDR USN Naval Ordnance Lord Baltimore Press (1921) p.44
^ Jump up to: a b c d e Sharpe, Philip B. Complete Guide to Handloading 3rd Edition (1953) Funk & Wagnalls pp.146-149
Jump up ^ seegunpowder
Jump up ^ Sharpe, Philip B. Complete Guide To Handloading (1953) Funk & Wagnalls p.60
Jump up ^ Davis, William C., Jr. Handloading (1981) National Rifle Association p.21
Jump up ^ Davis, Tenney L. The Chemistry of Powder & Explosives (1943) page 195
Jump up ^ Davis, William C., Jr. Handloading National Rifle Association of America (1981) p.28
^ Jump up to: a b c Sharpe, Philip B. Complete Guide to Handloading 3rd Edition (1953) Funk & Wagnalls pp.141-144
Jump up ^ Hogg, Oliver F. G. Artillery: Its Origin, Heyday and Decline (1969) p.138-139
^ Jump up to: a b Davis, Tenney L. The Chemistry of Powder & Explosives (1943) pages 289–292
Jump up ^ Hogg, Oliver F. G. Artillery: Its Origin, Heyday and Decline (1969) p.139
^ Jump up to: a b Hogg, Oliver F. G. Artillery: Its Origin, Heyday and Decline (1969) p.140
Jump up ^ U.S. Patent 430,212 – Manufacture of explosive – H. S. Maxim
Jump up ^ Hogg, Oliver F. G. Artillery: Its Origin, Heyday and Decline (1969) p.141
^ Jump up to: a b Davis, Tenney L. The Chemistry of Powder & Explosives (1943) pages 296-297
Jump up ^ "Laflin & Rand Powder Company". DuPont. Retrieved 2012-02-24.
^ Jump up to: a b Davis, Tenny L. The Chemistry of Powder & Explosives (1943) p.297
Jump up ^ Davis, Tenny L. The Chemistry of Powder & Explosives (1943) p.298
Jump up ^ Fairfield, A. P., CDR USN Naval Ordnance Lord Baltimore Press (1921) p.28
Jump up ^ Davis, Tenny L. The Chemistry of Powder & Explosives (1943) p. 310
Jump up ^ Fairfield, A. P., CDR USN Naval Ordnance Lord Baltimore Press (1921) pp.41–43
^ Jump up to: a b Davis, Tenny L. The Chemistry of Powder & Explosives (1943) p.306
^ Jump up to: a b c d e f g h Campbell, John Naval Weapons of World War Two (1985) p. 5
^ Jump up to: a b c Campbell, John Naval Weapons of World War Two (1985) p. 104
^ Jump up to: a b c Campbell, John Naval Weapons of World War Two (1985) p. 221
^ Jump up to: a b Campbell, John Naval Weapons of World War Two (1985) p. 318
Jump up ^ Davis, Tenny L. The Chemistry of Powder & Explosives (1943) pages 317–320
^ Jump up to: a b c Davis, William C., Jr. Handloading National Rifle Association of America (1981) p.30
Jump up ^ Davis, William C., Jr. Handloading National Rifle Association of America (1981) p.31
^ Jump up to: a b Campbell, John Naval Weapons of World War Two (1985) p. 174
Jump up ^ Davis, Tenny L. The Chemistry of Powder & Explosives (1943) pages 307–311
Jump up ^ Davis, Tenny L. The Chemistry of Powder & Explosives (1943) p. 302
^ Jump up to: a b Davis, Tenny L. The Chemistry of Powder & Explosives (1943) p. 296
Jump up ^ Davis, Tenny L. The Chemistry of Powder & Explosives (1943) p. 307
Jump up ^ Davis, Tenny L. The Chemistry of Powder & Explosives (1943) p. 308
^ Jump up to: a b Davis, William C., Jr. Handloading National Rifle Association of America (1981) p.32
Jump up ^ Davis, Tenny L. The Chemistry of Powder & Explosives (1943) pages 322–327
^ Jump up to: a b c Davis, Tenny L. The Chemistry of Powder & Explosives (1943) pages 323–327
Jump up ^ "USA 16"/50 (40.6 cm) Mark 7". NavWeaps. 2008-11-03. Retrieved 2008-12-05.
Jump up ^ Fairfield, A. P., CDR USN Naval Ordnance Lord Baltimore Press (1921) pages 28–31
Jump up ^ Fairfield, A. P., CDR USN Naval Ordnance Lord Baltimore Press (1921) pages 31–35
Jump up ^ Fairfield, A. P., CDR USN Naval Ordnance Lord Baltimore Press (1921) pages 35–41
Jump up ^ Davis, Tenny L. The Chemistry of Powder & Explosives (1943) pages 293 & 306
Jump up ^ Fairfield, A. P., CDR USN Naval Ordnance Lord Baltimore Press (1921) p.39
Jump up ^ Matunas, E. A. Winchester-Western Ball Powder Loading Data Olin Corporation (1978) p.3
Jump up ^ Davis, Tenny L. The Chemistry of Powder & Explosives (1943) pages 328–330
Jump up ^ Wolfe, Dave Propellant Profiles Volume 1 Wolfe Publishing Company (1982) pages 136–137
Jump up ^ General Dynamics Commercial Powder Applications.
Jump up ^ Moss G. M., Leeming D. W., Farrar C. L. Military Ballisitcs (1969) pages 55–56
^ Jump up to: a b Davis, Tenny L. The Chemistry of Powder & Explosives (1943) pages 322–323
Jump up ^ Milner p.68
Jump up ^ Moss G. M., Leeming D. W., Farrar C. L. Military Ballisitcs (1969) pages 59–60
Sources[edit]
Campbell, John (1985). Naval Weapons of World War Two. Naval Institute Press. ISBN 0-87021-459-4.
Davis, Tenney L. (1943). The Chemistry of Powder & Explosives (Angriff Press [1992] ed.). John Wiley & Sons Inc. ISBN 0-913022-00-4.
Davis, William C., Jr. (1981). Handloading. National Rifle Association of America. ISBN 0-935998-34-9.
Fairfield, A. P., CDR USN (1921). Naval Ordnance. Lord Baltimore Press.
Hatcher, Julian S. and Barr, Al (1951). Handloading. Hennage Lithograph Company.
Matunas, E. A. (1978). Winchester-Western Ball Powder Loading Data. Olin Corporation.
Milner, Marc (1985). North Atlantic Run. Naval Institute Press. ISBN 0-87021-450-0.
Wolfe, Dave (1982). Propellant Profiles Volume 1. Wolfe Publishing Company. ISBN 0-935632-10-7.
External links[edit]
The Manufacture of Smokeless Powders and their Forensic Analysis: A Brief Review – Robert M. Heramb, Bruce R. McCord
Hudson Maxim papers (1851-1925) at Hagley Museum and Library. Collection includes material relating to Maxim's patent on the process of making smokeless powder.
Categories: CorditeExplosivesFirearm propellantsSolid fuels
Navigation menu
Create accountLog inArticleTalkReadEditView history
Main page
Contents
Featured content
Current events
Random article
Donate to Wikipedia
Wikimedia Shop
Interaction
Help
About Wikipedia
Community portal
Recent changes
Contact page
Tools
What links here
Related changes
Upload file
Special pages
Permanent link
Page information
Wikidata item
Cite this page
Print/export
Create a book
Download as PDF
Printable version
Languages
العربية
Български
Dansk
Deutsch
Español
فارسی
Français
Bahasa Indonesia
Íslenska
Italiano
עברית
Nederlands
日本語
Polski
Português
Русский
Svenska
தமிழ்
中文
Edit links
This page was last modified on 25 July 2014 at 22:33.
Text is available under the Creative Commons Attribution-ShareAlike License; additional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.
Privacy policyAbout WikipediaDisclaimersContact WikipediaDevelopersMobile viewWikimedia Foundation Powered by MediaWiki
Deflagration
From Wikipedia, the free encyclopedia
[hide]This article has multiple issues. Please help improve it or discuss these issues on the talk page.
This article needs additional citations for verification. (April 2011)
This article may be too technical for most readers to understand. (December 2013)
A log in a fireplace.
Deflagration [1] (Lat: de + flagrare, "to burn down") is a term describing subsonic combustion propagating through heat transfer; hot burning material heats the next layer of cold material and ignites it. Most "fire" found in daily life, from flames to explosions, is deflagration. Deflagration is different from detonation, which is supersonic and propagates through shock.
Contents [hide]
1 Applications
2 Oil/wax fire and water
3 Flame physics
4 Damaging deflagration events
5 See also
6 References
Applications[edit]
In engineering applications, deflagrations are easier to control than detonations. Consequently, they are better suited when the goal is to move an object (a bullet in a gun, or a piston in an internal combustion engine) with the force of the expanding gas. Typical examples of deflagrations are the combustion of a gas-air mixture in a gas stove or a fuel-air mixture in an internal combustion engine, and the rapid burning of gunpowder in a firearm or of pyrotechnic mixtures in fireworks. Deflagration systems and products can also be used in mining, demolition and stone quarrying via gas pressure blasting as a beneficial alternative to high explosives.
Oil/wax fire and water[edit]
Adding water to a burning hydrocarbon such as oil or wax produces a deflagration. The water boils rapidly and ejects the burning material as a fine spray of droplets. A deflagration then occurs as the fine mist of oil ignites and burns extremely rapidly. These are particularly common in chip pan fires, which are responsible for one in five household fires in Britain.[2]
Flame physics[edit]
The underlying flame physics can be understood with the help of an idealized model consisting of a uniform one-dimensional tube of unburnt and burned gaseous fuel, separated by a thin transitional region of width \delta\; in which the burning occurs. The burning region is commonly referred to as the flame or flame front. In equilibrium, thermal diffusion across the flame front is balanced by the heat supplied by burning.
There are two characteristic timescales which are important here. The first is the thermal diffusion timescale \tau_d\;, which is approximately equal to
\tau_d \simeq \delta^2 / \kappa,
where \kappa \; is the thermal diffusivity. The second is the burning timescale \tau_b that strongly decreases with temperature, typically as
\tau_b\propto \exp[\Delta U/(k_B T_f)],
where \Delta U\; is the activation barrier for the burning reaction and T_f\; is the temperature developed as the result of burning; the value of this so-called "flame temperature" can be determined from the laws of thermodynamics.
For a stationary moving deflagration front, these two timescales must be equal: the heat generated by burning is equal to the heat carried away by heat transfer. This makes it possible to calculate the characteristic width \delta\; of the flame front:
\tau_b = \tau_d\;,
thus
\delta \simeq \sqrt {\kappa \tau_b} .
Now, the thermal flame front propagates at a characteristic speed S_l\;, which is simply equal to the flame width divided by the burn time:
S_l \simeq \delta / \tau_b \simeq \sqrt {\kappa / \tau_b} .
This simplified model neglects the change of temperature and thus the burning rate across the deflagration front. This model also neglects the possible influence of turbulence. As a result, this derivation gives only the laminar flame speed -- hence the designation S_l\;.
Damaging deflagration events[edit]
Damage to buildings, equipment and people can result from a large-scale, short-duration deflagration. The potential damage is primarily a function of the total amount of fuel burned in the event (total energy available), the maximum flame velocity that is achieved, and the manner in which the expansion of the combustion gases is contained.
In free-air deflagrations, there is a continuous variation in deflagration effects relative to the maximum flame velocity. When flame velocities are low, the effect of a deflagration is to release heat. Some authors use the term flash fire to describe these low-speed deflagrations. At flame velocities near the speed of sound, the energy released is in the form of pressure and the results resemble a detonation. Between these extremes both heat and pressure are released.
When a low-speed deflagration occurs within a closed vessel or structure, pressure effects can produce damage due to expansion of gases as a secondary effect. The heat released by the deflagration causes the combustion gases and excess air to expand thermally. The net result is that the volume of the vessel or structure must expand to accommodate the hot combustion gases, or the vessel must be strong enough to withstand the additional internal pressure, or it fails, allowing the gases to escape. The risks of deflagration inside waste storage drums is a growing concern in storage facilities.
See also[edit]
Look up deflagration in Wiktionary, the free dictionary.
Pressure piling
References[edit]
Jump up ^ "Glossary D-H". Hutchisonrodway.co.nz. Retrieved 2013-12-29.
Jump up ^ UK Fire Service advice on chip pan fires
Categories: Explosives
Navigation menu
Create accountLog inArticleTalkReadEditView history
Main page
Contents
Featured content
Current events
Random article
Donate to Wikipedia
Wikimedia Shop
Interaction
Help
About Wikipedia
Community portal
Recent changes
Contact page
Tools
What links here
Related changes
Upload file
Special pages
Permanent link
Page information
Wikidata item
Cite this page
Print/export
Create a book
Download as PDF
Printable version
Languages
Català
Čeština
Deutsch
Español
Français
Italiano
Lietuvių
Nederlands
Norsk bokmål
Polski
Português
Русский
Српски / srpski
Svenska
Edit links
This page was last modified on 2 October 2014 at 16:44.
Text is available under the Creative Commons Attribution-ShareAlike License; additional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.
Privacy policyAbout WikipediaDisclaimersContact WikipediaDevelopersMobile viewWikimedia Foundation Powered by MediaWiki
United Kingdom
From Wikipedia, the free encyclopedia
This article is about the sovereign state. For the island, see Great Britain. For other uses, see United Kingdom (disambiguation) and UK (disambiguation).
Page semi-protected
United Kingdom of Great
Britain and Northern Ireland[show]
A flag featuring both cross and saltire in red, white and blue Coat of arms containing shield and crown in centre, flanked by lion and unicorn
Flag Royal coat of arms[nb 1]
Anthem: "God Save the Queen"[nb 2]
MENU0:00
Two islands to the north-west of continental Europe. Highlighted are the larger island and the north-eastern fifth of the smaller island to the west.
Location of the United Kingdom (dark green)
– in Europe (green & dark grey)
– in the European Union (green)
Capital
and largest city London
51°30′N 0°7′W
Official language
and national language English
Recognised regional
languages Cornish, Irish, Scots, Scottish Gaelic, Ulster-Scots, Welsh[nb 3]
Ethnic groups (2011) 87.1% White
7.0% Asian
3.0% Black
2.0% Mixed
0.9% Other
Demonym British, Briton
Government Unitary parliamentary constitutional monarchy
- Monarch Elizabeth II
- Prime Minister David Cameron
Legislature Parliament
- Upper house House of Lords
- Lower house House of Commons
Formation
- Acts of Union 1707 1 May 1707
- Acts of Union 1800 1 January 1801
- Irish Free State Constitution Act 5 December 1922
Area
- Total 243,610 km2 (80th)
94,060 sq mi
- Water (%) 1.34
Population
- 2013 estimate 64,100,000[3] (22nd)
- 2011 census 63,181,775[4] (22nd)
- Density 255.6/km2 (51st)
661.9/sq mi
GDP (PPP) 2014 estimate
- Total $2.435 trillion[5] (10th)
- Per capita $37,744[5] (27th)
GDP (nominal) 2014 estimate
- Total $2.848 trillion[5] (6th)
- Per capita $44,141[5] (22nd)
Gini (2012) positive decrease 32.8[6]
medium · 33rd
HDI (2013) Steady 0.892[7]
very high · 14th
Currency Pound sterling (GBP)
Time zone GMT (UTC)
- Summer (DST) BST (UTC+1)
Date format dd/mm/yyyy (AD)
Drives on the left
Calling code +44
ISO 3166 code GB
Internet TLD .uk
The United Kingdom of Great Britain and Northern Ireland Listeni/ɡreɪt ˈbrɪt(ə)n ənd ˈnɔːð(ə)n ˈʌɪələnd/, commonly known as the United Kingdom (UK) or Britain, is a sovereign state in Europe. Lying off the north-western coast of the European mainland, the country includes the island of Great Britain (a term also applied loosely to refer to the whole country),[8] the north-eastern part of the island of Ireland, and many smaller islands. Northern Ireland is the only part of the UK that shares a land border with another state: the Republic of Ireland.[nb 4] Apart from this land border, the UK is surrounded by the Atlantic Ocean, with the North Sea in the east and the English Channel in the south. The Irish Sea lies between Great Britain and Ireland. The UK has an area of 243,610 square kilometres (94,060 sq mi), making it the 78th-largest sovereign state in the world and the 11th-largest in Europe.
The United Kingdom is the 22nd-most populous country, with an estimated 64.1 million inhabitants.[3] It is a constitutional monarchy with a parliamentary system of governance.[9][10] Its capital city is London, an important global city and financial centre with the fourth-largest urban area in Europe.[11] The current monarch—since 6 February 1952—is Queen Elizabeth II. The UK consists of four countries: England, Scotland, Wales, and Northern Ireland.[12] The latter three have devolved administrations,[13] each with varying powers,[14][15] based in their capitals, Edinburgh, Cardiff, and Belfast, respectively. Guernsey, Jersey, and the Isle of Man are not part of the United Kingdom, being Crown dependencies with the British Government responsible for defence and international representation.[16] The UK has fourteen Overseas Territories,[17] including the disputed Falkland Islands, Gibraltar, and Indian Ocean Territory.
The relationships among the countries of the United Kingdom have changed over time. Wales was annexed by the Kingdom of England under the Acts of Union of 1536 and 1543. A treaty between England and Scotland resulted in a unified Kingdom of Great Britain in 1707, which in 1801, merged with the Kingdom of Ireland to form the United Kingdom of Great Britain and Ireland. In 1922, five-sixths of Ireland seceded from the country, leaving the present formulation of the United Kingdom of Great Britain and Northern Ireland.[nb 5] British Overseas Territories, formerly colonies, are the remnants of the British Empire which, at its height in the late 19th and early 20th centuries, encompassed almost a quarter of the world's land mass and was the largest empire in history. British influence can be observed in the language, culture, and legal systems of many of its former colonies.
The United Kingdom is a developed country and has the world's sixth-largest economy by nominal GDP and tenth-largest by purchasing power parity. The country is considered to have a high-income economy and is categorised as very high in the Human Development Index, currently ranking 14th in the world. It was the world's first industrialised country and the world's foremost power during the 19th and early 20th centuries.[18][19] The UK remains a great power with considerable economic, cultural, military, scientific, and political influence internationally.[20][21] It is a recognised nuclear weapons state and its military expenditure ranks fifth or sixth in the world.[22][23] The UK has been a permanent member of the United Nations Security Council since its first session in 1946. It has been a member state of the European Union (EU) and its predecessor, the European Economic Community (EEC), since 1973; it is also a member of the Commonwealth of Nations, the Council of Europe, the G7, the G8, the G20, NATO, the Organisation for Economic Co-operation and Development (OECD), and the World Trade Organization (WTO).
Contents [hide]
1 Etymology and terminology
2 History
2.1 Before 1707
2.2 Since the Acts of Union of 1707
3 Geography
3.1 Climate
3.2 Administrative divisions
4 Dependencies
5 Politics
5.1 Government
5.2 Devolved administrations
5.3 Law and criminal justice
5.4 Foreign relations
5.5 Military
6 Economy
6.1 Science and technology
6.2 Transport
6.3 Energy
7 Demographics
7.1 Ethnic groups
7.2 Languages
7.3 Religion
7.4 Migration
7.5 Education
7.6 Healthcare
8 Culture
8.1 Literature
8.2 Music
8.3 Visual art
8.4 Cinema
8.5 Media
8.6 Philosophy
8.7 Sport
8.8 Symbols
9 See also
10 Notes
11 References
12 Further reading
13 External links
Etymology and terminology
See also: Britain (placename) and Terminology of the British Isles
The 1707 Acts of Union declared that the kingdoms of England and Scotland were "United into One Kingdom by the Name of Great Britain", though the new state is also referred to in the Acts as the "Kingdom of Great Britain", "United Kingdom of Great Britain" and "United Kingdom".[24][25][nb 6] However, the term "united kingdom" is only found in informal use during the 18th century and the country was only occasionally referred to as he "United Kingdom of Great Britain".[26] The Acts of Union 1800 united the Kingdom of Great Britain and the Kingdom of Ireland in 1801, forming the United Kingdom of Great Britain and Ireland. The name "United Kingdom of Great Britain and Northern Ireland" was adopted following the independence of the Irish Free State, and the partition of Ireland, in 1922, which left Northern Ireland as the only part of the island of Ireland within the UK.[27]
Although the United Kingdom, as a sovereign state, is a country, England, Scotland, Wales, and to a lesser degree, Northern Ireland, are also regarded as countries, though they are not sovereign states.[28][29] Scotland, Wales and Northern Ireland have devolved self-government.[30][31] The British Prime Minister's website has used the phrase "countries within a country" to describe the United Kingdom.[12] Some statistical summaries, such as those for the twelve NUTS 1 regions of the UK, also refer to Scotland, Wales and Northern Ireland as "regions".[32][33] Northern Ireland is also referred to as a "province".[28][34] With regard to Northern Ireland, the descriptive name used "can be controversial, with the choice often revealing one's political preferences."[35]
The term Britain is often used as synonym for the United Kingdom. The term Great Britain, by contrast, refers conventionally to the island of Great Britain, or politically to England, Scotland and Wales in combination.[36][37][38] However, it is sometimes used as a loose synonym for the United Kingdom as a whole.[39][40] GB and GBR are the standard country codes for the United Kingdom (see ISO 3166-2 and ISO 3166-1 alpha-3) and are consequently used by international organisations to refer to the United Kingdom. Additionally, the United Kingdom's Olympic team competes under the name "Great Britain" or "Team GB".[41][42]
The adjective British is commonly used to refer to matters relating to the United Kingdom. The term has no definite legal connotation, but is used in law to refer to UK citizenship and matters to do with nationality.[43] People of the United Kingdom use a number of different terms to describe their national identity and may identify themselves as being British; or as being English, Scottish, Welsh, Northern Irish, or Irish;[44] or as being both.[45]
In 2006, a new design of British passport was introduced. Its first page shows the long form name of the state in English, Welsh and Scottish Gaelic.[46] In Welsh, the long form name of the state is "Teyrnas Unedig Prydain Fawr a Gogledd Iwerddon" with "Teyrnas Unedig" being used as a short form name on government websites.[47] In Scottish Gaelic, the long form is "Rìoghachd Aonaichte Bhreatainn is Èireann a Tuath" and the short form "Rìoghachd Aonaichte".
History
See also: History of the British Isles
Before 1707
Stonehenge, in Wiltshire, was erected around 2500 BC.
Main articles: History of England, History of Wales, History of Scotland, History of Ireland and History of the formation of the United Kingdom
Settlement by anatomically modern humans of what was to become the United Kingdom occurred in waves beginning by about 30,000 years ago.[48] By the end of the region's prehistoric period, the population is thought to have belonged, in the main, to a culture termed Insular Celtic, comprising Brythonic Britain and Gaelic Ireland.[49] The Roman conquest, beginning in 43 AD, and the 400-year rule of southern Britain, was followed by an invasion by Germanic Anglo-Saxon settlers, reducing the Brythonic area mainly to what was to become Wales and the historic Kingdom of Strathclyde.[50] Most of the region settled by the Anglo-Saxons became unified as the Kingdom of England in the 10th century.[51] Meanwhile, Gaelic-speakers in north west Britain (with connections to the north-east of Ireland and traditionally supposed to have migrated from there in the 5th century)[52][53] united with the Picts to create the Kingdom of Scotland in the 9th century.[54]
In 1066, the Normans invaded England from France and after its conquest, seized large parts of Wales, conquered much of Ireland and were invited to settle in Scotland, bringing to each country feudalism on the Northern French model and Norman-French culture.[55] The Norman elites greatly influenced, but eventually assimilated with, each of the local cultures.[56] Subsequent medieval English kings completed the conquest of Wales and made an unsuccessful attempt to annex Scotland. Thereafter, Scotland maintained its independence, albeit in near-constant conflict with England. The English monarchs, through inheritance of substantial territories in France and claims to the French crown, were also heavily involved in conflicts in France, most notably the Hundred Years War, while the Kings of Scots were in an alliance with the French during this period.[57]
The Bayeux Tapestry depicts the Battle of Hastings and the events leading to it.
The early modern period saw religious conflict resulting from the Reformation and the introduction of Protestant state churches in each country.[58] Wales was fully incorporated into the Kingdom of England,[59] and Ireland was constituted as a kingdom in personal union with the English crown.[60] In what was to become Northern Ireland, the lands of the independent Catholic Gaelic nobility were confiscated and given to Protestant settlers from England and Scotland.[61]
In 1603, the kingdoms of England, Scotland and Ireland were united in a personal union when James VI, King of Scots, inherited the crowns of England and Ireland and moved his court from Edinburgh to London; each country nevertheless remained a separate political entity and retained its separate political, legal, and religious institutions.[62][63]
In the mid-17th century, all three kingdoms were involved in a series of connected wars (including the English Civil War) which led to the temporary overthrow of the monarchy and the establishment of the short-lived unitary republic of the Commonwealth of England, Scotland and Ireland.[64][65]
Although the monarchy was restored, it ensured (with the Glorious Revolution of 1688) that, unlike much of the rest of Europe, royal absolutism would not prevail, and a professed Catholic could never accede to the throne. The British constitution would develop on the basis of constitutional monarchy and the parliamentary system.[66] During this period, particularly in England, the development of naval power (and the interest in voyages of discovery) led to the acquisition and settlement of overseas colonies, particularly in North America.[67][68]
Since the Acts of Union of 1707
Main article: History of the United Kingdom
The Treaty of Union led to a single united kingdom encompassing all Great Britain.
On 1 May 1707, the united kingdom of Great Britain came into being, the result of Acts of Union being passed by the parliaments of England and Scotland to ratify the 1706 Treaty of Union and so unite the two kingdoms.[69][70][71]
In the 18th century, cabinet government developed under Robert Walpole, in practice the first prime minister (1721–1742). A series of Jacobite Uprisings sought to remove the Protestant House of Hanover from the British throne and restore the Catholic House of Stuart. The Jacobites were finally defeated at the Battle of Culloden in 1746, after which the Scottish Highlanders were brutally suppressed. The British colonies in North America that broke away from Britain in the American War of Independence became the United States of America in 1782. British imperial ambition turned elsewhere, particularly to India.[72]
During the 18th century, Britain was involved in the Atlantic slave trade. British ships transported an estimated 2 million slaves from Africa to the West Indies before banning the trade in 1807.[73] The term 'United Kingdom' became official in 1801 when the parliaments of Britain and Ireland each passed an Act of Union, uniting the two kingdoms and creating the United Kingdom of Great Britain and Ireland.[74]
In the early 19th century, the British-led Industrial Revolution began to transform the country. It slowly led to a shift in political power away from the old Tory and Whig landowning classes towards the new industrialists. An alliance of merchants and industrialists with the Whigs would lead to a new party, the Liberals, with an ideology of free trade and laissez-faire. In 1832 Parliament passed the Great Reform Act, which began the transfer of political power from the aristocracy to the middle classes. In the countryside, enclosure of the land was driving small farmers out. Towns and cities began to swell with a new urban working class. Few ordinary workers had the vote, and they created their own organisations in the form of trade unions.
Painting of a bloody battle. Horses and infantry fight or lie on grass.
The Battle of Waterloo marked the end of the Napoleonic Wars and the start of Pax Britannica.
After the defeat of France in the Revolutionary and Napoleonic Wars (1792–1815), the UK emerged as the principal naval and imperial power of the 19th century (with London the largest city in the world from about 1830).[75] Unchallenged at sea, British dominance was later described as Pax Britannica.[76][77] By the time of the Great Exhibition of 1851, Britain was described as the "workshop of the world".[78] The British Empire was expanded to include India, large parts of Africa and many other territories throughout the world. Alongside the formal control it exerted over its own colonies, British dominance of much of world trade meant that it effectively controlled the economies of many countries, such as China, Argentina and Siam.[79][80] Domestically, political attitudes favoured free trade and laissez-faire policies and a gradual widening of the voting franchise. During the century, the population increased at a dramatic rate, accompanied by rapid urbanisation, causing significant social and economic stresses.[81] After 1875, the UK's industrial monopoly was challenged by Germany and the USA. To seek new markets and sources of raw materials, the Conservative Party under Disraeli launched a period of imperialist expansion in Egypt, South Africa and elsewhere. Canada, Australia and New Zealand became self-governing dominions.[82]
Social reform and home rule for Ireland were important domestic issues after 1900. The Labour Party emerged from an alliance of trade unions and small Socialist groups in 1900, and suffragettes campaigned for women's right to vote before 1914.
Black-and-white photo of two dozen men in military uniforms and metal helmets sitting or standing in a muddy trench.
Infantry of the Royal Irish Rifles during the Battle of the Somme. More than 885,000 British soldiers died on the battlefields of World War I.
The UK fought with France, Russia and (after 1917) the US, against Germany and its allies in World War I (1914–18).[83] The UK armed forces were engaged across much of the British Empire and in several regions of Europe, particularly on the Western front.[84] The high fatalities of trench warfare caused the loss of much of a generation of men, with lasting social effects in the nation and a great disruption in the social order.
After the war, the UK received the League of Nations mandate over a number of former German and Ottoman colonies. The British Empire reached its greatest extent, covering a fifth of the world's land surface and a quarter of its population.[85] However, the UK had suffered 2.5 million casualties and finished the war with a huge national debt.[84] The rise of Irish Nationalism and disputes within Ireland over the terms of Irish Home Rule led eventually to the partition of the island in 1921,[86] and the Irish Free State became independent with Dominion status in 1922. Northern Ireland remained part of the United Kingdom.[87] A wave of strikes in the mid-1920s culminated in the UK General Strike of 1926. The UK had still not recovered from the effects of the war when the Great Depression (1929–32) occurred. This led to considerable unemployment and hardship in the old industrial areas, as well as political and social unrest in the 1930s. A coalition government was formed in 1931.[88]
The UK entered World War II by declaring war on Germany in 1939, after it had invaded Poland and Czechoslovakia. In 1940, Winston Churchill became prime minister and head of a coalition government. Despite the defeat of its European allies in the first year of the war, the UK continued the fight alone against Germany. In 1940, the RAF defeated the German Luftwaffe in a struggle for control of the skies in the Battle of Britain. The UK suffered heavy bombing during the Blitz. There were also eventual hard-fought victories in the Battle of the Atlantic, the North Africa campaign and Burma campaign. UK forces played an important role in the Normandy landings of 1944, achieved with its ally the US. After Germany's defeat, the UK was one of the Big Three powers who met to plan the post-war world; it was an original signatory to the Declaration of the United Nations. The UK became one of the five permanent members of the United Nations Security Council. However, the war left the UK severely weakened and depending financially on Marshall Aid and loans from the United States.[89]
Map of the world. Canada, the eastern United States, countries in east Africa, India, most of Australasia and some other countries are highlighted in pink.
Territories that were at one time part of the British Empire. Current British Overseas Territories are underlined in red.
In the immediate post-war years, the Labour government initiated a radical programme of reforms, which had a significant effect on British society in the following decades.[90] Major industries and public utilities were nationalised, a Welfare State was established, and a comprehensive, publicly funded healthcare system, the National Health Service, was created.[91] The rise of nationalism in the colonies coincided with Britain's now much-diminished economic position, so that a policy of decolonisation was unavoidable. Independence was granted to India and Pakistan in 1947.[92] Over the next three decades, most colonies of the British Empire gained their independence. Many became members of the Commonwealth of Nations.[93]
Although the UK was the third country to develop a nuclear weapons arsenal (with its first atomic bomb test in 1952), the new post-war limits of Britain's international role were illustrated by the Suez Crisis of 1956. The international spread of the English language ensured the continuing international influence of its literature and culture. From the 1960s onward, its popular culture was also influential abroad. As a result of a shortage of workers in the 1950s, the UK government encouraged immigration from Commonwealth countries. In the following decades, the UK became a multi-ethnic society.[94] Despite rising living standards in the late 1950s and 1960s, the UK's economic performance was not as successful as many of its competitors, such as West Germany and Japan. In 1973, the UK joined the European Economic Community (EEC), and when the EEC became the European Union (EU) in 1992, it was one of the 12 founding members.
After the two vetos of France in 1961 and 1967, the UK entered in the European Union in 1973. In 1975, 67% of Britons voted yes to the permanence in the European Union.
From the late 1960s, Northern Ireland suffered communal and paramilitary violence (sometimes affecting other parts of the UK) conventionally known as the Troubles. It is usually considered to have ended with the Belfast "Good Friday" Agreement of 1998.[95][96][97]
Following a period of widespread economic slowdown and industrial strife in the 1970s, the Conservative Government of the 1980s initiated a radical policy of monetarism, deregulation, particularly of the financial sector (for example, Big Bang in 1986) and labour markets, the sale of state-owned companies (privatisation), and the withdrawal of subsidies to others.[98] This resulted in high unemployment and social unrest, but ultimately also economic growth, particularly in the services sector. From 1984, the economy was helped by the inflow of substantial North Sea oil revenues.[99]
Around the end of the 20th century there were major changes to the governance of the UK with the establishment of devolved administrations for Scotland, Wales and Northern Ireland.[13][100] The statutory incorporation followed acceptance of the European Convention on Human Rights. The UK is still a key global player diplomatically and militarily. It plays leading roles in the EU, UN and NATO. However, controversy surrounds some of Britain's overseas military deployments, particularly in Afghanistan and Iraq.[101]
The 2008 global financial crisis severely affected the UK economy. The coalition government of 2010 introduced austerity measures intended to tackle the substantial public deficits which resulted.[102] In 2014 the Scottish Government held a referendum on Scottish independence, with the majority of voters rejecting the independence proposal and opting to remain within the United Kingdom.[103]
Geography
Main article: Geography of the United Kingdom
Map of United Kingdom showing hilly regions to north and west, and flattest region in the south-east.
The topography of the UK
The total area of the United Kingdom is approximately 243,610 square kilometres (94,060 sq mi). The country occupies the major part of the British Isles[104] archipelago and includes the island of Great Britain, the northeastern one-sixth of the island of Ireland and some smaller surrounding islands. It lies between the North Atlantic Ocean and the North Sea with the south-east coast coming within 22 miles (35 km) of the coast of northern France, from which it is separated by the English Channel.[105] In 1993 10% of the UK was forested, 46% used for pastures and 25% cultivated for agriculture.[106] The Royal Greenwich Observatory in London is the defining point of the Prime Meridian.[107]
The United Kingdom lies between latitudes 49° to 61° N, and longitudes 9° W to 2° E. Northern Ireland shares a 224-mile (360 km) land boundary with the Republic of Ireland.[105] The coastline of Great Britain is 11,073 miles (17,820 km) long.[108] It is connected to continental Europe by the Channel Tunnel, which at 31 miles (50 km) (24 miles (38 km) underwater) is the longest underwater tunnel in the world.[109]
England accounts for just over half of the total area of the UK, covering 130,395 square kilometres (50,350 sq mi).[110] Most of the country consists of lowland terrain,[106] with mountainous terrain north-west of the Tees-Exe line; including the Cumbrian Mountains of the Lake District, the Pennines and limestone hills of the Peak District, Exmoor and Dartmoor. The main rivers and estuaries are the Thames, Severn and the Humber. England's highest mountain is Scafell Pike (978 metres (3,209 ft)) in the Lake District. Its principal rivers are the Severn, Thames, Humber, Tees, Tyne, Tweed, Avon, Exe and Mersey.[106]
Scotland accounts for just under a third of the total area of the UK, covering 78,772 square kilometres (30,410 sq mi)[111] and including nearly eight hundred islands,[112] predominantly west and north of the mainland; notably the Hebrides, Orkney Islands and Shetland Islands. The topography of Scotland is distinguished by the Highland Boundary Fault – a geological rock fracture – which traverses Scotland from Arran in the west to Stonehaven in the east.[113] The faultline separates two distinctively different regions; namely the Highlands to the north and west and the lowlands to the south and east. The more rugged Highland region contains the majority of Scotland's mountainous land, including Ben Nevis which at 1,343 metres (4,406 ft) is the highest point in the British Isles.[114] Lowland areas – especially the narrow waist of land between the Firth of Clyde and the Firth of Forth known as the Central Belt – are flatter and home to most of the population including Glasgow, Scotland's largest city, and Edinburgh, its capital and political centre.
A view of Ben Nevis in the distance, fronted by rolling plains
Ben Nevis, in Scotland, is the highest point in the British Isles
Wales accounts for less than a tenth of the total area of the UK, covering 20,779 square kilometres (8,020 sq mi).[115] Wales is mostly mountainous, though South Wales is less mountainous than North and mid Wales. The main population and industrial areas are in South Wales, consisting of the coastal cities of Cardiff, Swansea and Newport, and the South Wales Valleys to their north. The highest mountains in Wales are in Snowdonia and include Snowdon (Welsh: Yr Wyddfa) which, at 1,085 metres (3,560 ft), is the highest peak in Wales.[106] The 14, or possibly 15, Welsh mountains over 3,000 feet (914 m) high are known collectively as the Welsh 3000s. Wales has over 2,704 kilometres (1,680 miles) of coastline.[116] Several islands lie off the Welsh mainland, the largest of which is Anglesey (Ynys Môn) in the northwest.
Northern Ireland, separated from Great Britain by the Irish Sea and North Channel, has an area of 14,160 square kilometres (5,470 sq mi) and is mostly hilly. It includes Lough Neagh which, at 388 square kilometres (150 sq mi), is the largest lake in the British Isles by area.[117] The highest peak in Northern Ireland is Slieve Donard in the Mourne Mountains at 852 metres (2,795 ft).[106]
Climate
Main article: Climate of the United Kingdom
The United Kingdom has a temperate climate, with plentiful rainfall all year round.[105] The temperature varies with the seasons seldom dropping below −11 °C (12 °F) or rising above 35 °C (95 °F).[118] The prevailing wind is from the south-west and bears frequent spells of mild and wet weather from the Atlantic Ocean,[105] although the eastern parts are mostly sheltered from this wind since the majority of the rain falls over the western regions the eastern parts are therefore the driest. Atlantic currents, warmed by the Gulf Stream, bring mild winters; especially in the west where winters are wet and even more so over high ground. Summers are warmest in the south-east of England, being closest to the European mainland, and coolest in the north. Heavy snowfall can occur in winter and early spring on high ground, and occasionally settles to great depth away from the hills.
Administrative divisions
Main article: Administrative geography of the United Kingdom
Each country of the United Kingdom has its own system of administrative and geographic demarcation, whose origins often pre-date the formation of the United Kingdom. Thus there is "no common stratum of administrative unit encompassing the United Kingdom".[119] Until the 19th century there was little change to those arrangements, but there has since been a constant evolution of role and function.[120] Change did not occur in a uniform manner and the devolution of power over local government to Scotland, Wales and Northern Ireland means that future changes are unlikely to be uniform either.
The organisation of local government in England is complex, with the distribution of functions varying according to local arrangements. Legislation concerning local government in England is the responsibility of the UK parliament and the Government of the United Kingdom, as England has no devolved parliament. The upper-tier subdivisions of England are the nine Government office regions or European Union government office regions.[121] One region, Greater London, has had a directly elected assembly and mayor since 2000 following popular support for the proposal in a referendum.[122] It was intended that other regions would also be given their own elected regional assemblies, but a proposed assembly in the North East region was rejected by a referendum in 2004.[123] Below the regional tier, some parts of England have county councils and district councils and others have unitary authorities; while London consists of 32 London boroughs and the City of London. Councillors are elected by the first-past-the-post system in single-member wards or by the multi-member plurality system in multi-member wards.[124]
For local government purposes, Scotland is divided into 32 council areas, with wide variation in both size and population. The cities of Glasgow, Edinburgh, Aberdeen and Dundee are separate council areas, as is the Highland Council which includes a third of Scotland's area but only just over 200,000 people. Local councils are made up of elected councillors, of whom there are currently 1,222;[125] they are paid a part-time salary. Elections are conducted by single transferable vote in multi-member wards that elect either three or four councillors. Each council elects a Provost, or Convenor, to chair meetings of the council and to act as a figurehead for the area. Councillors are subject to a code of conduct enforced by the Standards Commission for Scotland.[126] The representative association of Scotland's local authorities is the Convention of Scottish Local Authorities (COSLA).[127]
Local government in Wales consists of 22 unitary authorities. These include the cities of Cardiff, Swansea and Newport which are unitary authorities in their own right.[128] Elections are held every four years under the first-past-the-post system.[129] The most recent elections were held in May 2012, except for the Isle of Anglesey. The Welsh Local Government Association represents the interests of local authorities in Wales.[130]
Local government in Northern Ireland has since 1973 been organised into 26 district councils, each elected by single transferable vote. Their powers are limited to services such as collecting waste, controlling dogs and maintaining parks and cemeteries.[131] On 13 March 2008 the executive agreed on proposals to create 11 new councils and replace the present system.[132] The next local elections were postponed until 2016 to facilitate this.[133]
Dependencies
A view of the Caribbean Sea from the Cayman Islands, one of the world's foremost international financial centres[134] and tourist destinations.[135]
Main articles: British Overseas Territories, Crown dependencies and British Islands
The United Kingdom has sovereignty over seventeen territories which do not form part of the United Kingdom itself: fourteen British Overseas Territories[136] and three Crown dependencies.[137]
The fourteen British Overseas Territories are: Anguilla; Bermuda; the British Antarctic Territory; the British Indian Ocean Territory; the British Virgin Islands; the Cayman Islands; the Falkland Islands; Gibraltar; Montserrat; Saint Helena, Ascension and Tristan da Cunha; the Turks and Caicos Islands; the Pitcairn Islands; South Georgia and the South Sandwich Islands; and Sovereign Base Areas on Cyprus.[138] British claims in Antarctica are not universally recognised.[139] Collectively Britain's overseas territories encompass an approximate land area of 1,727,570 square kilometres (667,018 sq mi) and a population of approximately 260,000 people.[140] They are the remnants of the British Empire and several have specifically voted to remain British territories (Bermuda in 1995, Gibraltar in 2002 and the Falkland Islands in 2013).[141]
The Crown dependencies are possessions of the Crown, as opposed to overseas territories of the UK.[142] They comprise three independently administered jurisdictions: the Channel Islands of Jersey and Guernsey in the English Channel, and the Isle of Man in the Irish Sea. By mutual agreement, the British Government manages the islands' foreign affairs and defence and the UK Parliament has the authority to legislate on their behalf. However, internationally, they are regarded as "territories for which the United Kingdom is responsible".[143] The power to pass legislation affecting the islands ultimately rests with their own respective legislative assemblies, with the assent of the Crown (Privy Council or, in the case of the Isle of Man, in certain circumstances the Lieutenant-Governor).[144] Since 2005 each Crown dependency has had a Chief Minister as its head of government.[145]
Politics
Main articles: Politics of the United Kingdom, Monarchy of the United Kingdom and Elections in the United Kingdom
Elderly lady with a yellow hat and grey hair is smiling in outdoor setting.
Elizabeth II, Queen of the United Kingdom and the other Commonwealth realms
The United Kingdom is a unitary state under a constitutional monarchy. Queen Elizabeth II is the head of state of the UK as well as monarch of fifteen other independent Commonwealth countries. The monarch has "the right to be consulted, the right to encourage, and the right to warn".[146] The United Kingdom is one of only four countries in the world to have an uncodified constitution.[147][nb 7] The Constitution of the United Kingdom thus consists mostly of a collection of disparate written sources, including statutes, judge-made case law and international treaties, together with constitutional conventions. As there is no technical difference between ordinary statutes and "constitutional law", the UK Parliament can perform "constitutional reform" simply by passing Acts of Parliament, and thus has the political power to change or abolish almost any written or unwritten element of the constitution. However, no Parliament can pass laws that future Parliaments cannot change.[148]
Government
Main article: Government of the United Kingdom
The UK has a parliamentary government based on the Westminster system that has been emulated around the world: a legacy of the British Empire. The parliament of the United Kingdom that meets in the Palace of Westminster has two houses; an elected House of Commons and an appointed House of Lords. All bills passed are given Royal Assent before becoming law.
The position of prime minister,[nb 8] the UK's head of government,[149] belongs to the person most likely to command the confidence of the House of Commons; this individual is typically the leader of the political party or coalition of parties that holds the largest number of seats in that chamber. The prime minister chooses a cabinet and they are formally appointed by the monarch to form Her Majesty's Government. By convention, the Queen respects the prime minister's decisions of government.[150]
Large sand-coloured building of Gothic design beside brown river and road bridge. The building has several large towers, including large clock-tower.
The Palace of Westminster, seat of both houses of the Parliament of the United Kingdom
The cabinet is traditionally drawn from members of a prime minister's party or coalition and mostly from the House of Commons but always from both legislative houses, the cabinet being responsible to both. Executive power is exercised by the prime minister and cabinet, all of whom are sworn into the Privy Council of the United Kingdom, and become Ministers of the Crown. The current Prime Minister is David Cameron, who has been in office since 11 May 2010.[151] Cameron is the leader of the Conservative Party and heads a coalition with the Liberal Democrats. For elections to the House of Commons, the UK is currently divided into 650 constituencies,[152] each electing a single member of parliament (MP) by simple plurality. General elections are called by the monarch when the prime minister so advises. The Parliament Acts 1911 and 1949 require that a new election must be called no later than five years after the previous general election.[153]
The UK's three major political parties are the Conservative Party (Tories), the Labour Party and the Liberal Democrats, representing the British traditions of conservatism, socialism and social liberalism, respectively. During the 2010 general election these three parties won 622 out of 650 seats available in the House of Commons.[154][155] Most of the remaining seats were won by parties that contest elections only in one part of the UK: the Scottish National Party (Scotland only); Plaid Cymru (Wales only); and the Alliance Party, Democratic Unionist Party, Social Democratic and Labour Party and Sinn Féin (Northern Ireland only[nb 9]). In accordance with party policy, no elected Sinn Féin members of parliament have ever attended the House of Commons to speak on behalf of their constituents because of the requirement to take an oath of allegiance to the monarch.
Devolved administrations
Main articles: Devolution in the United Kingdom, Northern Ireland Executive, Scottish Government and Welsh Government
Modern one-story building with grass on roof and large sculpted grass area in front. Behind are residential buildings in a mixture of styles.
The Scottish Parliament Building in Holyrood is the seat of the Scottish Parliament.
Scotland, Wales and Northern Ireland each have their own government or executive, led by a First Minister (or, in the case of Northern Ireland, a diarchal First Minister and deputy First Minister), and a devolved unicameral legislature. England, the largest country of the United Kingdom, has no such devolved executive or legislature and is administered and legislated for directly by the UK government and parliament on all issues. This situation has given rise to the so-called West Lothian question which concerns the fact that members of parliament from Scotland, Wales and Northern Ireland can vote, sometimes decisively,[156] on matters that only affect England.[157] The McKay Commission reported on this matter in March 2013 recommending that laws affecting only England should need support from a majority of English members of parliament.[158]
The Scottish Government and Parliament have wide-ranging powers over any matter that has not been specifically reserved to the UK parliament, including education, healthcare, Scots law and local government.[159] At the 2011 elections the Scottish National Party won re-election and achieved an overall majority in the Scottish parliament, with its leader, Alex Salmond, as First Minister of Scotland.[160][161] In 2012, the UK and Scottish governments signed the Edinburgh Agreement setting out the terms for a referendum on Scottish independence in 2014, which was defeated 55% to 45%.
The Welsh Government and the National Assembly for Wales have more limited powers than those devolved to Scotland.[162] The Assembly is able to legislate on devolved matters through Acts of the Assembly, which require no prior consent from Westminster. The 2011 elections resulted in a minority Labour administration led by Carwyn Jones.[163]
The Northern Ireland Executive and Assembly have powers similar to those devolved to Scotland. The Executive is led by a diarchy representing unionist and nationalist members of the Assembly. Currently, Peter Robinson (Democratic Unionist Party) and Martin McGuinness (Sinn Féin) are First Minister and deputy First Minister respectively.[164] Devolution to Northern Ireland is contingent on participation by the Northern Ireland administration in the North-South Ministerial Council, where the Northern Ireland Executive cooperates and develops joint and shared policies with the Government of Ireland. The British and Irish governments co-operate on non-devolved matters affecting Northern Ireland through the British–Irish Intergovernmental Conference, which assumes the responsibilities of the Northern Ireland administration in the event of its non-operation.
The UK does not have a codified constitution and constitutional matters are not among the powers devolved to Scotland, Wales or Northern Ireland. Under the doctrine of parliamentary sovereignty, the UK Parliament could, in theory, therefore, abolish the Scottish Parliament, Welsh Assembly or Northern Ireland Assembly.[165][166] Indeed, in 1972, the UK Parliament unilaterally prorogued the Parliament of Northern Ireland, setting a precedent relevant to contemporary devolved institutions.[167] In practice, it would be politically difficult for the UK Parliament to abolish devolution to the Scottish Parliament and the Welsh Assembly, given the political entrenchment created by referendum decisions.[168] The political constraints placed upon the UK Parliament's power to interfere with devolution in Northern Ireland are even greater than in relation to Scotland and Wales, given that devolution in Northern Ireland rests upon an international agreement with the Government of Ireland.[169]
Law and criminal justice
Main article: Law of the United Kingdom
The Royal Courts of Justice of England and Wales
The United Kingdom does not have a single legal system, as Article 19 of the 1706 Treaty of Union provided for the continuation of Scotland's separate legal system.[170] Today the UK has three distinct systems of law: English law, Northern Ireland law and Scots law. A new Supreme Court of the United Kingdom came into being in October 2009 to replace the Appellate Committee of the House of Lords.[171][172] The Judicial Committee of the Privy Council, including the same members as the Supreme Court, is the highest court of appeal for several independent Commonwealth countries, the British Overseas Territories and the Crown Dependencies.[173]
Both English law, which applies in England and Wales, and Northern Ireland law are based on common-law principles.[174] The essence of common law is that, subject to statute, the law is developed by judges in courts, applying statute, precedent and common sense to the facts before them to give explanatory judgements of the relevant legal principles, which are reported and binding in future similar cases (stare decisis).[175] The courts of England and Wales are headed by the Senior Courts of England and Wales, consisting of the Court of Appeal, the High Court of Justice (for civil cases) and the Crown Court (for criminal cases). The Supreme Court is the highest court in the land for both criminal and civil appeal cases in England, Wales and Northern Ireland and any decision it makes is binding on every other court in the same jurisdiction, often having a persuasive effect in other jurisdictions.[176]
The High Court of Justiciary – the supreme criminal court of Scotland.
Scots law is a hybrid system based on both common-law and civil-law principles. The chief courts are the Court of Session, for civil cases,[177] and the High Court of Justiciary, for criminal cases.[178] The Supreme Court of the United Kingdom serves as the highest court of appeal for civil cases under Scots law.[179] Sheriff courts deal with most civil and criminal cases including conducting criminal trials with a jury, known as sheriff solemn court, or with a sheriff and no jury, known as sheriff summary Court.[180] The Scots legal system is unique in having three possible verdicts for a criminal trial: "guilty", "not guilty" and "not proven". Both "not guilty" and "not proven" result in an acquittal.[181]
Crime in England and Wales increased in the period between 1981 and 1995, though since that peak there has been an overall fall of 48% in crime from 1995 to 2007/08,[182] according to crime statistics. The prison population of England and Wales has almost doubled over the same period, to over 80,000, giving England and Wales the highest rate of incarceration in Western Europe at 147 per 100,000.[183] Her Majesty's Prison Service, which reports to the Ministry of Justice, manages most of the prisons within England and Wales. Crime in Scotland fell to its lowest recorded level for 32 years in 2009/10, falling by ten per cent.[184] At the same time Scotland's prison population, at over 8,000,[185] is at record levels and well above design capacity.[186] The Scottish Prison Service, which reports to the Cabinet Secretary for Justice, manages Scotland's prisons.
Foreign relations
Main article: Foreign relations of the United Kingdom
The Prime Minister of the United Kingdom, David Cameron, and the President of the United States, Barack Obama, during the 2010 G-20 Toronto summit.
The UK is a permanent member of the United Nations Security Council, a member of NATO, the Commonwealth of Nations, G7, G8, G20, the OECD, the WTO, the Council of Europe, the OSCE, and is a member state of the European Union. The UK is said to have a "Special Relationship" with the United States and a close partnership with France—the "Entente cordiale"—and shares nuclear weapons technology with both countries.[187][188] The UK is also closely linked with the Republic of Ireland; the two countries share a Common Travel Area and co-operate through the British-Irish Intergovernmental Conference and the British-Irish Council. Britain's global presence and influence is further amplified through its trading relations, foreign investments, official development assistance and military engagements.[189]
Military
Troopers of the Blues and Royals during the 2007 Trooping the Colour ceremony
Main article: British Armed Forces
The armed forces of the United Kingdom—officially, Her Majesty's Armed Forces—consist of three professional service branches: the Royal Navy and Royal Marines (forming the Naval Service), the British Army and the Royal Air Force.[190] The forces are managed by the Ministry of Defence and controlled by the Defence Council, chaired by the Secretary of State for Defence. The Commander-in-Chief is the British monarch, Elizabeth II, to whom members of the forces swear an oath of allegiance.[191] The Armed Forces are charged with protecting the UK and its overseas territories, promoting the UK's global security interests and supporting international peacekeeping efforts. They are active and regular participants in NATO, including the Allied Rapid Reaction Corps, as well as the Five Power Defence Arrangements, RIMPAC and other worldwide coalition operations. Overseas garrisons and facilities are maintained in Ascension Island, Belize, Brunei, Canada, Cyprus, Diego Garcia, the Falkland Islands, Germany, Gibraltar, Kenya and Qatar.[192]
The British armed forces played a key role in establishing the British Empire as the dominant world power in the 18th, 19th and early 20th centuries. Throughout its unique history the British forces have seen action in a number of major wars, such as the Seven Years' War, the Napoleonic Wars, the Crimean War, World War I and World War II—as well as many colonial conflicts. By emerging victorious from such conflicts, Britain has often been able to decisively influence world events. Since the end of the British Empire, the UK has nonetheless remained a major military power. Following the end of the Cold War, defence policy has a stated assumption that "the most demanding operations" will be undertaken as part of a coalition.[193] Setting aside the intervention in Sierra Leone, recent UK military operations in Bosnia, Kosovo, Afghanistan, Iraq and, most recently, Libya, have followed this approach. The last time the British military fought alone was the Falklands War of 1982.
According to various sources, including the Stockholm International Peace Research Institute and the International Institute for Strategic Studies, the United Kingdom has the fifth- or sixth-highest military expenditure in the world. Total defence spending currently accounts for around 2.4% of total national GDP.[22][23]
Economy
Main article: Economy of the United Kingdom
The Bank of England – the central bank of the United Kingdom
The UK has a partially regulated market economy.[194] Based on market exchange rates the UK is today the sixth-largest economy in the world and the third-largest in Europe after Germany and France, having fallen behind France for the first time in over a decade in 2008.[195] HM Treasury, led by the Chancellor of the Exchequer, is responsible for developing and executing the British government's public finance policy and economic policy. The Bank of England is the UK's central bank and is responsible for issuing notes and coins in the nation's currency, the pound sterling. Banks in Scotland and Northern Ireland retain the right to issue their own notes, subject to retaining enough Bank of England notes in reserve to cover their issue. Pound sterling is the world's third-largest reserve currency (after the US Dollar and the Euro).[196] Since 1997 the Bank of England's Monetary Policy Committee, headed by the Governor of the Bank of England, has been responsible for setting interest rates at the level necessary to achieve the overall inflation target for the economy that is set by the Chancellor each year.[197]
The UK service sector makes up around 73% of GDP.[198] London is one of the three "command centres" of the global economy (alongside New York City and Tokyo),[199] it is the world's largest financial centre alongside New York,[200][201][202] and it has the largest city GDP in Europe.[203] Edinburgh is also one of the largest financial centres in Europe.[204] Tourism is very important to the British economy and, with over 27 million tourists arriving in 2004, the United Kingdom is ranked as the sixth major tourist destination in the world and London has the most international visitors of any city in the world.[205][206] The creative industries accounted for 7% GVA in 2005 and grew at an average of 6% per annum between 1997 and 2005.[207]
The Airbus A350 has its wings and engines manufactured in the UK.
The Industrial Revolution started in the UK with an initial concentration on the textile industry,[208] followed by other heavy industries such as shipbuilding, coal mining and steelmaking.[209][210]
The empire was exploited as an overseas market for British products, allowing the UK to dominate international trade in the 19th century. As other nations industrialised, coupled with economic decline after two world wars, the United Kingdom began to lose its competitive advantage and heavy industry declined, by degrees, throughout the 20th century. Manufacturing remains a significant part of the economy but accounted for only 16.7% of national output in 2003.[211]
The automotive industry is a significant part of the UK manufacturing sector and employs over 800,000 people, with a turnover of some £52 billion, generating £26.6 billion of exports.[212]
The aerospace industry of the UK is the second- or third-largest national aerospace industry in the world depending upon the method of measurement and has an annual turnover of around £20 billion. The wings for the Airbus A380 and the A350 XWB are designed and manufactured at Airbus UK's world-leading Broughton facility, whilst over a quarter of the value of the Boeing 787 comes from UK manufacturers including Eaton (fuel subsystem pumps), Messier-Bugatti-Dowty (the landing gear) and Rolls-Royce (the engines). Other key names include GKN Aerospace – an expert in metallic and composite aerostructures that's involved in almost every civil and military fixed and rotary wing aircraft in production and development today.[213][214][215][216]
BAE Systems - plays a critical role on some of the world's biggest defence aerospace projects. The company makes large sections of the Typhoon Eurofighter at its sub-assembly plant in Salmesbury and assembles the aircraft for the RAF at its Warton Plant, near Preston. It is also a principal subcontractor on the F35 Joint Strike Fighter - the world's largest single defence project - for which it designs and manufactures a range of components including the aft fuselage, vertical and horizontal tail and wing tips and fuel system. As well as this it manufactures the Hawk, the world's most successful jet training aircraft.[216] Airbus UK also manufactures the wings for the A400m military transporter. Rolls-Royce, is the world's second-largest aero-engine manufacturer. Its engines power more than 30 types of commercial aircraft and it has more than 30,000 engines currently in service across both the civil and defence sectors. Agusta Westland designs and manufactures complete helicopters in the UK.[216]
The UK space industry is growing very fast. Worth £9.1bn in 2011 and employing 29,000 people, it is growing at a rate of some 7.5 per cent annually, according to its umbrella organisation, the UK Space Agency. Government strategy is for the space industry to be a £40bn business for the UK by 2030, capturing a 10 per cent share of the $250bn world market for commercial space technology.[216] On 16 July 2013, the British government pledged £60m to the Skylon project: this investment will provide support at a "crucial stage" to allow a full-scale prototype of the SABRE engine to be built.
The pharmaceutical industry plays an important role in the UK economy and the country has the third-highest share of global pharmaceutical R&D expenditures (after the United States and Japan).[217][218]
Agriculture is intensive, highly mechanised and efficient by European standards, producing about 60% of food needs with less than 1.6% of the labour force (535,000 workers).[219] Around two-thirds of production is devoted to livestock, one-third to arable crops. Farmers are subsidised by the EU's Common Agricultural Policy. The UK retains a significant, though much reduced fishing industry. It is also rich in a number of natural resources including coal, petroleum, natural gas, tin, limestone, iron ore, salt, clay, chalk, gypsum, lead, silica and an abundance of arable land.
The City of London is the world's largest financial centre alongside New York[200][201][202]
In the final quarter of 2008 the UK economy officially entered recession for the first time since 1991.[220] Unemployment increased from 5.2% in May 2008 to 7.6% in May 2009 and by January 2012 the unemployment rate among 18 to 24-year-olds had risen from 11.9% to 22.5%, the highest since current records began in 1992.[221][222] Total UK government debt rose from 44.4% of GDP in 2007 to 82.9% of GDP in 2011.[223] In February 2013, the UK lost its top AAA credit rating for the first time since 1978.[224]
Inflation-adjusted wages in the UK fell by 3.2% between the third quarter of 2010 and the third quarter of 2012.[225] Since the 1980s, economic inequality has grown faster in the UK than in any other developed country.[226]
The poverty line in the UK is commonly defined as being 60% of the median household income.[nb 10] In 2007–2008 13.5 million people, or 22% of the population, lived below this line. This is a higher level of relative poverty than all but four other EU members.[227] In the same year 4.0 million children, 31% of the total, lived in households below the poverty line after housing costs were taken into account. This is a decrease of 400,000 children since 1998–1999.[228] The UK imports 40% of its food supplies.[229] The Office for National Statistics has estimated that in 2011, 14 million people were at risk of poverty or social exclusion, and that one person in 20 (5.1%) was now experiencing "severe material depression,"[230] up from 3 million people in 1977.[231][232]
Science and technology
Main article: Science and technology in the United Kingdom
Charles Darwin (1809–82), whose theory of evolution by natural selection is the foundation of modern biological sciences
England and Scotland were leading centres of the Scientific Revolution from the 17th century[233] and the United Kingdom led the Industrial Revolution from the 18th century,[208] and has continued to produce scientists and engineers credited with important advances.[234] Major theorists from the 17th and 18th centuries include Isaac Newton, whose laws of motion and illumination of gravity have been seen as a keystone of modern science;[235] from the 19th century Charles Darwin, whose theory of evolution by natural selection was fundamental to the development of modern biology, and James Clerk Maxwell, who formulated classical electromagnetic theory; and more recently Stephen Hawking, who has advanced major theories in the fields of cosmology, quantum gravity and the investigation of black holes.[236] Major scientific discoveries from the 18th century include hydrogen by Henry Cavendish;[237] from the 20th century penicillin by Alexander Fleming,[238] and the structure of DNA, by Francis Crick and others.[239] Major engineering projects and applications by people from the UK in the 18th century include the steam locomotive, developed by Richard Trevithick and Andrew Vivian;[240] from the 19th century the electric motor by Michael Faraday, the incandescent light bulb by Joseph Swan,[241] and the first practical telephone, patented by Alexander Graham Bell;[242] and in the 20th century the world's first working television system by John Logie Baird and others,[243] the jet engine by Frank Whittle, the basis of the modern computer by Alan Turing, and the World Wide Web by Tim Berners-Lee.[244]
Scientific research and development remains important in British universities, with many establishing science parks to facilitate production and co-operation with industry.[245] Between 2004 and 2008 the UK produced 7% of the world's scientific research papers and had an 8% share of scientific citations, the third and second highest in the world (after the United States and China, and the United States, respectively).[246] Scientific journals produced in the UK include Nature, the British Medical Journal and The Lancet.[247]
Transport
Main article: Transport in the United Kingdom
Heathrow Terminal 5 building. London Heathrow Airport has the most international passenger traffic of any airport in the world.[248][249]
A radial road network totals 29,145 miles (46,904 km) of main roads, 2,173 miles (3,497 km) of motorways and 213,750 miles (344,000 km) of paved roads.[105] In 2009 there were a total of 34 million licensed vehicles in Great Britain.[250]
The UK has a railway network of 10,072 miles (16,209 km) in Great Britain and 189 miles (304 km) in Northern Ireland. Railways in Northern Ireland are operated by NI Railways, a subsidiary of state-owned Translink. In Great Britain, the British Rail network was privatised between 1994 and 1997. Network Rail owns and manages most of the fixed assets (tracks, signals etc.). About 20 privately owned (and foreign state-owned railways including: Deutsche Bahn; SNCF and Nederlandse Spoorwegen) Train Operating Companies (including state-owned East Coast), operate passenger trains and carry over 18,000 passenger trains daily. There are also some 1,000 freight trains in daily operation.[105] The UK government is to spend £30 billion on a new high-speed railway line, HS2, to be operational by 2025.[251] Crossrail, under construction in London, Is Europe's largest construction project with a £15 billion projected cost.[252][253]
In the year from October 2009 to September 2010 UK airports handled a total of 211.4 million passengers.[254] In that period the three largest airports were London Heathrow Airport (65.6 million passengers), Gatwick Airport (31.5 million passengers) and London Stansted Airport (18.9 million passengers).[254] London Heathrow Airport, located 15 miles (24 km) west of the capital, has the most international passenger traffic of any airport in the world[248][249] and is the hub for the UK flag carrier British Airways, as well as for BMI and Virgin Atlantic.[255]
Energy
Main article: Energy in the United Kingdom
An oil platform in the North Sea
In 2006, the UK was the world's ninth-largest consumer of energy and the 15th-largest producer.[256] The UK is home to a number of large energy companies, including two of the six oil and gas "supermajors" – BP and Royal Dutch Shell – and BG Group.[257][258] In 2011, 40% of the UK's electricity was produced by gas, 30% by coal, 19% by nuclear power and 4.2% by wind, hydro, biofuels and wastes.[259]
In 2009, the UK produced 1.5 million barrels per day (bbl/d) of oil and consumed 1.7 million bbl/d.[260] Production is now in decline and the UK has been a net importer of oil since 2005.[260] In 2010 the UK had around 3.1 billion barrels of proven crude oil reserves, the largest of any EU member state.[260] In 2009, 66.5% of the UK's oil supply was imported.[261]
In 2009, the UK was the 13th-largest producer of natural gas in the world and the largest producer in the EU.[262] Production is now in decline and the UK has been a net importer of natural gas since 2004.[262] In 2009, half of British gas was supplied from imports and this is expected to increase to at least 75% by 2015, as domestic reserves are depleted.[259]
Coal production played a key role in the UK economy in the 19th and 20th centuries. In the mid-1970s, 130 million tonnes of coal was being produced annually, not falling below 100 million tonnes until the early 1980s. During the 1980s and 1990s the industry was scaled back considerably. In 2011, the UK produced 18.3 million tonnes of coal.[263] In 2005 it had proven recoverable coal reserves of 171 million tons.[263] The UK Coal Authority has stated there is a potential to produce between 7 billion tonnes and 16 billion tonnes of coal through underground coal gasification (UCG) or 'fracking',[264] and that, based on current UK coal consumption, such reserves could last between 200 and 400 years.[265] However, environmental and social concerns have been raised over chemicals getting into the water table and minor earthquakes damaging homes.[266][267]
In the late 1990s, nuclear power plants contributed around 25% of total annual electricity generation in the UK, but this has gradually declined as old plants have been shut down and ageing-related problems affect plant availability. In 2012, the UK had 16 reactors normally generating about 19% of its electricity. All but one of the reactors will be retired by 2023. Unlike Germany and Japan, the UK intends to build a new generation of nuclear plants from about 2018.[259]
Demographics
Main article: Demographics of the United Kingdom
Map of population density in the UK as at the 2011 census.
A census is taken simultaneously in all parts of the UK every ten years.[268] The Office for National Statistics is responsible for collecting data for England and Wales, the General Register Office for Scotland and the Northern Ireland Statistics and Research Agency each being responsible for censuses in their respective countries.[269] In the 2011 census the total population of the United Kingdom was 63,181,775.[270] It is the third-largest in the European Union, the fifth-largest in the Commonwealth and the 21st-largest in the world. 2010 was the third successive year in which natural change contributed more to population growth than net long-term international migration.[271][271] Between 2001 and 2011 the population increased by an average annual rate of approximately 0.7 per cent.[270] This compares to 0.3 per cent per year in the period 1991 to 2001 and 0.2 per cent in the decade 1981 to 1991.[271] The 2011 census also confirmed that the proportion of the population aged 0–14 has nearly halved (31 per cent in 1911 compared to 18 in 2011) and the proportion of older people aged 65 and over has more than trebled (from 5 to 16 per cent).[270] It has been estimated that the number of people aged 100 or over will rise steeply to reach over 626,000 by 2080.[272]
England's population in 2011 was found to be 53 million.[273] It is one of the most densely populated countries in the world, with 383 people resident per square kilometre in mid-2003,[274] with a particular concentration in London and the south-east.[275] The 2011 census put Scotland's population at 5.3 million,[276] Wales at 3.06 million and Northern Ireland at 1.81 million.[273] In percentage terms England has had the fastest growing population of any country of the UK in the period from 2001 to 2011, with an increase of 7.9%.
In 2012 the average total fertility rate (TFR) across the UK was 1.92 children per woman.[277] While a rising birth rate is contributing to current population growth, it remains considerably below the 'baby boom' peak of 2.95 children per woman in 1964,[278] below the replacement rate of 2.1, but higher than the 2001 record low of 1.63.[277] In 2012, Scotland had the lowest TFR at only 1.67, followed by Wales at 1.88, England at 1.94, and Northern Ireland at 2.03.[277] In 2011, 47.3% of births in the UK were to unmarried women.[279] A government figure estimated that there are 3.6 million homosexual people in Britain comprising 6 per cent of the population.[280]
view talk edit
view talk edit
Largest urban areas of the United Kingdom
United Kingdom 2011 census Built-up areas[281][282][283]
Rank Urban area Pop. Principal settlement Rank Urban area Pop. Principal settlement
Greater London Urban Area
Greater London Urban Area
Greater Manchester Urban Area
Greater Manchester Urban Area
1 Greater London Urban Area 9,787,426 London 11 Bristol Urban Area 617,280 Bristol West Midlands Urban Area
West Midlands Urban Area
West Yorkshire Urban Area
West Yorkshire Urban Area
2 Greater Manchester Urban Area 2,553,379 Manchester 12 Belfast Metropolitan Urban Area 579,236 Belfast
3 West Midlands Urban Area 2,440,986 Birmingham 13 Leicester Urban Area 508,916 Leicester
4 West Yorkshire Urban Area 1,777,934 Leeds 14 Edinburgh 488,610 Edinburgh
5 Greater Glasgow 976,970 Glasgow 15 Brighton/Worthing/Littlehampton 474,485 Brighton
6 Liverpool Urban Area 864,122 Liverpool 16 South East Dorset conurbation 466,266 Bournemouth
7 South Hampshire 855,569 Southampton 17 Cardiff Urban Area 390,214 Cardiff
8 Tyneside 774,891 Newcastle 18 Teesside 376,633 Middlesbrough
9 Nottingham Urban Area 729,977 Nottingham 19 The Potteries Urban Area 372,775 Stoke-on-Trent
10 Sheffield Urban Area 685,368 Sheffield 20 Coventry and Bedworth Urban Area 359,262 Coventry
Ethnic groups
Map showing the percentage of the population who are not white according to the 2011 census.
Ethnic group 2011
population 2011
%
White 55,010,359 87.1
White: Irish Traveller 63,193 0.1
Asian or Asian British: Indian 1,451,862
2.3
Asian or Asian British: Pakistani 1,173,892
1.9
Asian or Asian British: Bangladeshi 451,529
0.7
Asian or Asian British: Chinese 433,150
0.7
Asian or Asian British: Asian Other 861,815
1.4
Asian or Asian British: Total 4,373,339
7.0
Black or Black British 1,904,684
3.0
British Mixed 1,250,229
2.0
Other: Total 580,374
0.9
Total[284] 63,182,178
100
Historically, indigenous British people were thought to be descended from the various ethnic groups that settled there before the 11th century: the Celts, Romans, Anglo-Saxons, Norse and the Normans. Welsh people could be the oldest ethnic group in the UK.[285] A 2006 genetic study shows that more than 50 per cent of England's gene pool contains Germanic Y chromosomes.[286] Another 2005 genetic analysis indicates that "about 75 per cent of the traceable ancestors of the modern British population had arrived in the British isles by about 6,200 years ago, at the start of the British Neolithic or Stone Age", and that the British broadly share a common ancestry with the Basque people.[287][288][289]
The UK has a history of small-scale non-white immigration, with Liverpool having the oldest Black population in the country dating back to at least the 1730s during the period of the African slave trade,[290] and the oldest Chinese community in Europe, dating to the arrival of Chinese seamen in the 19th century.[291] In 1950 there were probably fewer than 20,000 non-white residents in Britain, almost all born overseas.[292]
Since 1948 substantial immigration from Africa, the Caribbean and South Asia has been a legacy of ties forged by the British Empire. Migration from new EU member states in Central and Eastern Europe since 2004 has resulted in growth in these population groups but, as of 2008, the trend is reversing. Many of these migrants are returning to their home countries, leaving the size of these groups unknown.[293] In 2011, 86% of the population identified themselves as White, meaning 12.9% of the UK population identify themselves as of mixed ethnic minority.
Ethnic diversity varies significantly across the UK. 30.4% of London's population and 37.4% of Leicester's was estimated to be non-white in 2005,[294][295] whereas less than 5% of the populations of North East England, Wales and the South West were from ethnic minorities, according to the 2001 census.[296] In 2011, 26.5% of primary and 22.2% of secondary pupils at state schools in England were members of an ethnic minority.[297]
The non-white British population of England and Wales increased by 38% from 6.6 million in 2001 to 9.1 million in 2009.[298] The fastest-growing group was the mixed-ethnicity population, which doubled from 672,000 in 2001 to 986,600 in 2009. Also in the same period, a decrease of 36,000 white British people was recorded.[299]
Languages
Main article: Languages of the United Kingdom
The English-speaking world. Countries in dark blue have a majority of native speakers; countries where English is an official but not a majority language are shaded in light blue. English is one of the official languages of the European Union[300] and the United Nations[301]
The UK's de facto official language is English.[302][303] It is estimated that 95% of the UK's population are monolingual English speakers.[304] 5.5% of the population are estimated to speak languages brought to the UK as a result of relatively recent immigration.[304] South Asian languages, including Bengali, Tamil, Punjabi, Hindi and Gujarati, are the largest grouping and are spoken by 2.7% of the UK population.[304] According to the 2011 census, Polish has become the second-largest language spoken in England and has 546,000 speakers.[305]
Four Celtic languages are spoken in the UK: Welsh; Irish; Scottish Gaelic; and Cornish. All are recognised as regional or minority languages, subject to specific measures of protection and promotion under the European Charter for Regional or Minority Languages[2][306] and the Framework Convention for the Protection of National Minorities.[307] In the 2001 Census over a fifth (21%) of the population of Wales said they could speak Welsh,[308] an increase from the 1991 Census (18%).[309] In addition it is estimated that about 200,000 Welsh speakers live in England.[310] In the same census in Northern Ireland 167,487 people (10.4%) stated that they had "some knowledge of Irish" (see Irish language in Northern Ireland), almost exclusively in the nationalist (mainly Catholic) population. Over 92,000 people in Scotland (just under 2% of the population) had some Gaelic language ability, including 72% of those living in the Outer Hebrides.[311] The number of schoolchildren being taught through Welsh, Scottish Gaelic and Irish is increasing.[312] Among emigrant-descended populations some Scottish Gaelic is still spoken in Canada (principally Nova Scotia and Cape Breton Island),[313] and Welsh in Patagonia, Argentina.[314]
Scots, a language descended from early northern Middle English, has limited recognition alongside its regional variant, Ulster Scots in Northern Ireland, without specific commitments to protection and promotion.[2][315]
It is compulsory for pupils to study a second language up to the age of 14 in England,[316] and up to age 16 in Scotland. French and German are the two most commonly taught second languages in England and Scotland. All pupils in Wales are taught Welsh as a second language up to age 16, or are taught in Welsh.[317]
Religion
Main article: Religion in the United Kingdom
Westminster Abbey is used for the coronation of British monarchs
Forms of Christianity have dominated religious life in what is now the United Kingdom for over 1,400 years.[318] Although a majority of citizens still identify with Christianity in many surveys, regular church attendance has fallen dramatically since the middle of the 20th century,[319] while immigration and demographic change have contributed to the growth of other faiths, most notably Islam.[320] This has led some commentators to variously describe the UK as a multi-faith,[321] secularised,[322] or post-Christian society.[323]
In the 2001 census 71.6% of all respondents indicated that they were Christians, with the next largest faiths (by number of adherents) being Islam (2.8%), Hinduism (1.0%), Sikhism (0.6%), Judaism (0.5%), Buddhism (0.3%) and all other religions (0.3%).[324] 15% of respondents stated that they had no religion, with a further 7% not stating a religious preference.[325] A Tearfund survey in 2007 showed only one in ten Britons actually attend church weekly.[326] Between the 2001 and 2011 census there was a decrease in the amount of people who identified as Christian by 12%, whilst the percentage of those reporting no religious affiliation doubled. This contrasted with growth in the other main religious group categories, with the number of Muslims increasing by the most substantial margin to a total of about 5%.[327]
The Church of England is the established church in England.[328] It retains a representation in the UK Parliament and the British monarch is its Supreme Governor.[329] In Scotland the Presbyterian Church of Scotland is recognised as the national church. It is not subject to state control, and the British monarch is an ordinary member, required to swear an oath to "maintain and preserve the Protestant Religion and Presbyterian Church Government" upon his or her accession.[330][331] The (Anglican) Church in Wales was disestablished in 1920 and, as the (Anglican) Church of Ireland was disestablished in 1870 before the partition of Ireland, there is no established church in Northern Ireland.[332] Although there are no UK-wide data in the 2001 census on adherence to individual Christian denominations, it has been estimated that 62% of Christians are Anglican, 13.5% Catholic, 6% Presbyterian, 3.4% Methodist with small numbers of other Protestant denominations such as Open Brethren, and Orthodox churches.[333]
Migration
Main article: Immigration to the United Kingdom since 1922
See also: Foreign-born population of the United Kingdom
Estimated foreign-born population by country of birth, April 2007 – March 2008
The United Kingdom has experienced successive waves of migration. The Great Famine in Ireland, then part of the United Kingdom, resulted in perhaps a million people migrating to Great Brtain.[334] Unable to return to Poland at the end of World War II, over 120,000 Polish veterans remained in the UK permanently.[335] After World War II, there was significant immigration from the colonies and newly independent former colonies, partly as a legacy of empire and partly driven by labour shortages. Many of these migrants came from the Caribbean and the Indian subcontinent.[336] The British Asian population has increased from 2.2 million in 2001 to over 4.2 million in 2011.[337]
One of the more recent trends in migration has been the arrival of workers from the new EU member states in Eastern Europe. In 2010, there were 7.0 million foreign-born residents in the UK, corresponding to 11.3% of the total population. Of these, 4.76 million (7.7%) were born outside the EU and 2.24 million (3.6%) were born in another EU Member State.[338] The proportion of foreign-born people in the UK remains slightly below that of many other European countries.[339] However, immigration is now contributing to a rising population[340] with arrivals and UK-born children of migrants accounting for about half of the population increase between 1991 and 2001. Analysis of Office for National Statistics (ONS) data shows that a net total of 2.3 million migrants moved to the UK in the 15 years from 1991 to 2006.[341][342] In 2008 it was predicted that migration would add 7 million to the UK population by 2031,[343] though these figures are disputed.[344] The ONS reported that net migration rose from 2009 to 2010 by 21 per cent to 239,000.[345] In 2011 the net increase was 251,000: immigration was 589,000, while the number of people emigrating (for more than 12 months) was 338,000.[346][347]
195,046 foreign nationals became British citizens in 2010,[348] compared to 54,902 in 1999.[348][349] A record 241,192 people were granted permanent settlement rights in 2010, of whom 51 per cent were from Asia and 27 per cent from Africa.[350] 25.5 per cent of babies born in England and Wales in 2011 were born to mothers born outside the UK, according to official statistics released in 2012.[351]
Citizens of the European Union, including those of the UK, have the right to live and work in any EU member state.[352] The UK applied temporary restrictions to citizens of Romania and Bulgaria, which joined the EU in January 2007.[353] Research conducted by the Migration Policy Institute for the Equality and Human Rights Commission suggests that, between May 2004 and September 2009, 1.5 million workers migrated from the new EU member states to the UK, two-thirds of them Polish, but that many subsequently returned home, resulting in a net increase in the number of nationals of the new member states in the UK of some 700,000 over that period.[354][355] The late-2000s recession in the UK reduced the economic incentive for Poles to migrate to the UK,[356] the migration becoming temporary and circular.[357] In 2009, for the first time since enlargement, more nationals of the eight central and eastern European states that had joined the EU in 2004 left the UK than arrived.[358] In 2011, citizens of the new EU member states made up 13% of the immigrants entering the country.[346]
Estimated number of British citizens living overseas by country, 2006
The UK government has introduced a points-based immigration system for immigration from outside the European Economic Area to replace former schemes, including the Scottish Government's Fresh Talent Initiative.[359] In June 2010 the UK government introduced a temporary limit of 24,000 on immigration from outside the EU, aiming to discourage applications before a permanent cap was imposed in April 2011.[360] The cap has caused tension within the coalition: business secretary Vince Cable has argued that it is harming British businesses.[361]
Emigration was an important feature of British society in the 19th century. Between 1815 and 1930 around 11.4 million people emigrated from Britain and 7.3 million from Ireland. Estimates show that by the end of the 20th century some 300 million people of British and Irish descent were permanently settled around the globe.[362] Today, at least 5.5 million UK-born people live abroad,[363][364][365] mainly in Australia, Spain, the United States and Canada.[363][366]
Education
Main article: Education in the United Kingdom
See also: Education in England, Education in Northern Ireland, Education in Scotland and Education in Wales
King's College, part of the University of Cambridge, which was founded in 1209
Education in the United Kingdom is a devolved matter, with each country having a separate education system.
Whilst education in England is the responsibility of the Secretary of State for Education, the day-to-day administration and funding of state schools is the responsibility of local authorities.[367] Universally free of charge state education was introduced piecemeal between 1870 and 1944.[368][369] Education is now mandatory from ages five to sixteen (15 if born in late July or August). In 2011, the Trends in International Mathematics and Science Study (TIMSS) rated 13–14-year-old pupils in England and Wales 10th in the world for maths and 9th for science.[370] The majority of children are educated in state-sector schools, a small proportion of which select on the grounds of academic ability. Two of the top ten performing schools in terms of GCSE results in 2006 were state-run grammar schools. Over half of students at the leading universities of Cambridge and Oxford had attended state schools.[371] Despite a fall in actual numbers the proportion of children in England attending private schools has risen to over 7%.[372] In 2010, more than 45% of places at the University of Oxford and 40% at the University of Cambridge were taken by students from private schools, even though they educate just 7% of the population.[373] England has the two oldest universities in English-speaking world, Universities of Oxford and Cambridge (jointly known as "Oxbridge") with history of over eight centuries. The United Kingdom has 9 universities featured in the Times Higher Education top 100 rankings, making it second to the United States in terms of representation.[374]
Queen's University Belfast, built in 1849[375]
Education in Scotland is the responsibility of the Cabinet Secretary for Education and Lifelong Learning, with day-to-day administration and funding of state schools the responsibility of Local Authorities. Two non-departmental public bodies have key roles in Scottish education. The Scottish Qualifications Authority is responsible for the development, accreditation, assessment and certification of qualifications other than degrees which are delivered at secondary schools, post-secondary colleges of further education and other centres.[376] The Learning and Teaching Scotland provides advice, resources and staff development to education professionals.[377] Scotland first legislated for compulsory education in 1496.[378] The proportion of children in Scotland attending private schools is just over 4%, and it has been rising slowly in recent years.[379] Scottish students who attend Scottish universities pay neither tuition fees nor graduate endowment charges, as fees were abolished in 2001 and the graduate endowment scheme was abolished in 2008.[380]
The Welsh Government has responsibility for education in Wales. A significant number of Welsh students are taught either wholly or largely in the Welsh language; lessons in Welsh are compulsory for all until the age of 16.[381] There are plans to increase the provision of Welsh-medium schools as part of the policy of creating a fully bilingual Wales.
Education in Northern Ireland is the responsibility of the Minister of Education and the Minister for Employment and Learning, although responsibility at a local level is administered by five education and library boards covering different geographical areas. The Council for the Curriculum, Examinations & Assessment (CCEA) is the body responsible for advising the government on what should be taught in Northern Ireland's schools, monitoring standards and awarding qualifications.[382]
A government commission's report in 2014 found that privately educated people comprise 7% of the general population of the UK but much larger percentages of the top professions, the most extreme case quoted being 71% of senior judges.[383][384]
Healthcare
Main article: Healthcare in the United Kingdom
The Royal Aberdeen Children's Hospital, an NHS Scotland specialist children's hospital
Healthcare in the United Kingdom is a devolved matter and each country has its own system of private and publicly funded health care, together with alternative, holistic and complementary treatments. Public healthcare is provided to all UK permanent residents and is mostly free at the point of need, being paid for from general taxation. The World Health Organization, in 2000, ranked the provision of healthcare in the United Kingdom as fifteenth best in Europe and eighteenth in the world.[385][386]
Regulatory bodies are organised on a UK-wide basis such as the General Medical Council, the Nursing and Midwifery Council and non-governmental-based, such as the Royal Colleges. However, political and operational responsibility for healthcare lies with four national executives; healthcare in England is the responsibility of the UK Government; healthcare in Northern Ireland is the responsibility of the Northern Ireland Executive; healthcare in Scotland is the responsibility of the Scottish Government; and healthcare in Wales is the responsibility of the Welsh Assembly Government. Each National Health Service has different policies and priorities, resulting in contrasts.[387][388]
Since 1979 expenditure on healthcare has been increased significantly to bring it closer to the European Union average.[389] The UK spends around 8.4 per cent of its gross domestic product on healthcare, which is 0.5 percentage points below the Organisation for Economic Co-operation and Development average and about one percentage point below the average of the European Union.[390]
Culture
Main article: Culture of the United Kingdom
The culture of the United Kingdom has been influenced by many factors including: the nation's island status; its history as a western liberal democracy and a major power; as well as being a political union of four countries with each preserving elements of distinctive traditions, customs and symbolism. As a result of the British Empire, British influence can be observed in the language, culture and legal systems of many of its former colonies including Australia, Canada, India, Ireland, New Zealand, South Africa and the United States. The substantial cultural influence of the United Kingdom has led it to be described as a "cultural superpower."[391][392]
Literature
Main article: British literature
The Chandos portrait, believed to depict William Shakespeare
'British literature' refers to literature associated with the United Kingdom, the Isle of Man and the Channel Islands. Most British literature is in the English language. In 2005, some 206,000 books were published in the United Kingdom and in 2006 it was the largest publisher of books in the world.[393]
The English playwright and poet William Shakespeare is widely regarded as the greatest dramatist of all time,[394][395][396] and his contemporaries Christopher Marlowe and Ben Jonson have also been held in continuous high esteem. More recently the playwrights Alan Ayckbourn, Harold Pinter, Michael Frayn, Tom Stoppard and David Edgar have combined elements of surrealism, realism and radicalism.
Notable pre-modern and early-modern English writers include Geoffrey Chaucer (14th century), Thomas Malory (15th century), Sir Thomas More (16th century), John Bunyan (17th century) and John Milton (17th century). In the 18th century Daniel Defoe (author of Robinson Crusoe) and Samuel Richardson were pioneers of the modern novel. In the 19th century there followed further innovation by Jane Austen, the gothic novelist Mary Shelley, the children's writer Lewis Carroll, the Brontë sisters, the social campaigner Charles Dickens, the naturalist Thomas Hardy, the realist George Eliot, the visionary poet William Blake and romantic poet William Wordsworth. 20th-century English writers include the science-fiction novelist H. G. Wells; the writers of children's classics Rudyard Kipling, A. A. Milne (the creator of Winnie-the-Pooh), Roald Dahl and Enid Blyton; the controversial D. H. Lawrence; the modernist Virginia Woolf; the satirist Evelyn Waugh; the prophetic novelist George Orwell; the popular novelists W. Somerset Maugham and Graham Greene; the crime writer Agatha Christie (the best-selling novelist of all time);[397] Ian Fleming (the creator of James Bond); the poets T.S. Eliot, Philip Larkin and Ted Hughes; the fantasy writers J. R. R. Tolkien, C. S. Lewis and J. K. Rowling; the graphic novelist Alan Moore, whose novel Watchmen is often cited by critics as comic's greatest series and graphic novel[398] and one of the best-selling graphic novels ever published.[399]
A photograph of Victorian era novelist Charles Dickens
Scotland's contributions include the detective writer Arthur Conan Doyle (the creator of Sherlock Holmes), romantic literature by Sir Walter Scott, the children's writer J. M. Barrie, the epic adventures of Robert Louis Stevenson and the celebrated poet Robert Burns. More recently the modernist and nationalist Hugh MacDiarmid and Neil M. Gunn contributed to the Scottish Renaissance. A more grim outlook is found in Ian Rankin's stories and the psychological horror-comedy of Iain Banks. Scotland's capital, Edinburgh, was UNESCO's first worldwide City of Literature.[400]
Britain's oldest known poem, Y Gododdin, was composed in Yr Hen Ogledd (The Old North), most likely in the late 6th century. It was written in Cumbric or Old Welsh and contains the earliest known reference to King Arthur.[401] From around the seventh century, the connection between Wales and the Old North was lost, and the focus of Welsh-language culture shifted to Wales, where Arthurian legend was further developed by Geoffrey of Monmouth.[402] Wales's most celebrated medieval poet, Dafydd ap Gwilym (fl.1320–1370), composed poetry on themes including nature, religion and especially love. He is widely regarded as one of the greatest European poets of his age.[403] Until the late 19th century the majority of Welsh literature was in Welsh and much of the prose was religious in character. Daniel Owen is credited as the first Welsh-language novelist, publishing Rhys Lewis in 1885. The best-known of the Anglo-Welsh poets are both Thomases. Dylan Thomas became famous on both sides of the Atlantic in the mid-20th century. He is remembered for his poetry – his "Do not go gentle into that good night; Rage, rage against the dying of the light." is one of the most quoted couplets of English language verse – and for his 'play for voices', Under Milk Wood. The influential Church in Wales 'poet-priest' and Welsh nationalist R. S. Thomas was nominated for the Nobel Prize in Literature in 1996. Leading Welsh novelists of the twentieth century include Richard Llewellyn and Kate Roberts.[404][405]
Authors of other nationalities, particularly from Commonwealth countries, the Republic of Ireland and the United States, have lived and worked in the UK. Significant examples through the centuries include Jonathan Swift, Oscar Wilde, Bram Stoker, George Bernard Shaw, Joseph Conrad, T.S. Eliot, Ezra Pound and more recently British authors born abroad such as Kazuo Ishiguro and Sir Salman Rushdie.[406][407]
Music
Main article: Music of the United Kingdom
See also: British rock
The Beatles are the most commercially successful and critically acclaimed band in the history of music, selling over a billion records internationally.[408][409][410]
Various styles of music are popular in the UK from the indigenous folk music of England, Wales, Scotland and Northern Ireland to heavy metal. Notable composers of classical music from the United Kingdom and the countries that preceded it include William Byrd, Henry Purcell, Sir Edward Elgar, Gustav Holst, Sir Arthur Sullivan (most famous for working with the librettist Sir W. S. Gilbert), Ralph Vaughan Williams and Benjamin Britten, pioneer of modern British opera. Sir Peter Maxwell Davies is one of the foremost living composers and current Master of the Queen's Music. The UK is also home to world-renowned symphonic orchestras and choruses such as the BBC Symphony Orchestra and the London Symphony Chorus. Notable conductors include Sir Simon Rattle, John Barbirolli and Sir Malcolm Sargent. Some of the notable film score composers include John Barry, Clint Mansell, Mike Oldfield, John Powell, Craig Armstrong, David Arnold, John Murphy, Monty Norman and Harry Gregson-Williams. George Frideric Handel, although born German, was a naturalised British citizen[411] and some of his best works, such as Messiah, were written in the English language.[412] Andrew Lloyd Webber has achieved enormous worldwide commercial success and is a prolific composer of musical theatre, works which have dominated London's West End for a number of years and have travelled to Broadway in New York.[413]
The Beatles have international sales of over one billion units and are the biggest-selling and most influential band in the history of popular music.[408][409][410][414] Other prominent British contributors to have influenced popular music over the last 50 years include; The Rolling Stones, Led Zeppelin, Pink Floyd, Queen, the Bee Gees, and Elton John, all of whom have world wide record sales of 200 million or more.[415][416][417][418][419][420] The Brit Awards are the BPI's annual music awards, and some of the British recipients of the Outstanding Contribution to Music award include; The Who, David Bowie, Eric Clapton, Rod Stewart and The Police.[421] More recent UK music acts that have had international success include Coldplay, Radiohead, Oasis, Spice Girls, Robbie Williams, Amy Winehouse and Adele.[422]
A number of UK cities are known for their music. Acts from Liverpool have had more UK chart number one hit singles per capita (54) than any other city worldwide.[423] Glasgow's contribution to music was recognised in 2008 when it was named a UNESCO City of Music, one of only three cities in the world to have this honour.[424]
Visual art
Main article: Art of the United Kingdom
J. M. W. Turner self-portrait, oil on canvas, c. 1799
The history of British visual art forms part of western art history. Major British artists include: the Romantics William Blake, John Constable, Samuel Palmer and J.M.W. Turner; the portrait painters Sir Joshua Reynolds and Lucian Freud; the landscape artists Thomas Gainsborough and L. S. Lowry; the pioneer of the Arts and Crafts Movement William Morris; the figurative painter Francis Bacon; the Pop artists Peter Blake, Richard Hamilton and David Hockney; the collaborative duo Gilbert and George; the abstract artist Howard Hodgkin; and the sculptors Antony Gormley, Anish Kapoor and Henry Moore. During the late 1980s and 1990s the Saatchi Gallery in London helped to bring to public attention a group of multi-genre artists who would become known as the "Young British Artists": Damien Hirst, Chris Ofili, Rachel Whiteread, Tracey Emin, Mark Wallinger, Steve McQueen, Sam Taylor-Wood and the Chapman Brothers are among the better-known members of this loosely affiliated movement.
The Royal Academy in London is a key organisation for the promotion of the visual arts in the United Kingdom. Major schools of art in the UK include: the six-school University of the Arts London, which includes the Central Saint Martins College of Art and Design and Chelsea College of Art and Design; Goldsmiths, University of London; the Slade School of Fine Art (part of University College London); the Glasgow School of Art; the Royal College of Art; and The Ruskin School of Drawing and Fine Art (part of the University of Oxford). The Courtauld Institute of Art is a leading centre for the teaching of the history of art. Important art galleries in the United Kingdom include the National Gallery, National Portrait Gallery, Tate Britain and Tate Modern (the most-visited modern art gallery in the world, with around 4.7 million visitors per year).[425]
Cinema
Main article: Cinema of the United Kingdom
Film director Alfred Hitchcock
The United Kingdom has had a considerable influence on the history of the cinema. The British directors Alfred Hitchcock, whose film Vertigo is considered by some critics as the best film of all time,[426] and David Lean are among the most critically acclaimed of all-time.[427] Other important directors including Charlie Chaplin,[428] Michael Powell,[429] Carol Reed[430] and Ridley Scott.[431] Many British actors have achieved international fame and critical success, including: Julie Andrews,[432] Richard Burton,[433] Michael Caine,[434] Charlie Chaplin,[435] Sean Connery,[436] Vivien Leigh,[437] David Niven,[438] Laurence Olivier,[439] Peter Sellers,[440] Kate Winslet,[441] and Daniel Day-Lewis, the only person to win an Oscar in the best actor category three times.[442] Some of the most commercially successful films of all time have been produced in the United Kingdom, including the two highest-grossing film franchises (Harry Potter and James Bond).[443] Ealing Studios has a claim to being the oldest continuously working film studio in the world.[444]
Despite a history of important and successful productions, the industry has often been characterised by a debate about its identity and the level of American and European influence. British producers are active in international co-productions and British actors, directors and crew feature regularly in American films. Many successful Hollywood films have been based on British people, stories or events, including Titanic, The Lord of the Rings, Pirates of the Caribbean.
In 2009, British films grossed around $2 billion worldwide and achieved a market share of around 7% globally and 17% in the United Kingdom.[445] UK box-office takings totalled £944 million in 2009, with around 173 million admissions.[445] The British Film Institute has produced a poll ranking of what it considers to be the 100 greatest British films of all time, the BFI Top 100 British films.[446] The annual British Academy Film Awards, hosted by the British Academy of Film and Television Arts, are the British equivalent of the Oscars.[447]
Media
Main article: Media of the United Kingdom
Broadcasting House in London, headquarters of the BBC, the oldest and largest broadcaster in the world.[448][449][450]
The BBC, founded in 1922, is the UK's publicly funded radio, television and Internet broadcasting corporation, and is the oldest and largest broadcaster in the world.[448][449][450] It operates numerous television and radio stations in the UK and abroad and its domestic services are funded by the television licence.[451][452] Other major players in the UK media include ITV plc, which operates 11 of the 15 regional television broadcasters that make up the ITV Network,[453] and News Corporation, which owns a number of national newspapers through News International such as the most popular tabloid The Sun and the longest-established daily "broadsheet" The Times,[454] as well as holding a large stake in satellite broadcaster British Sky Broadcasting.[455] London dominates the media sector in the UK: national newspapers and television and radio are largely based there, although Manchester is also a significant national media centre. Edinburgh and Glasgow, and Cardiff, are important centres of newspaper and broadcasting production in Scotland and Wales respectively.[456] The UK publishing sector, including books, directories and databases, journals, magazines and business media, newspapers and news agencies, has a combined turnover of around £20 billion and employs around 167,000 people.[457]
In 2009, it was estimated that individuals viewed a mean of 3.75 hours of television per day and 2.81 hours of radio. In that year the main BBC public service broadcasting channels accounted for an estimated 28.4% of all television viewing; the three main independent channels accounted for 29.5% and the increasingly important other satellite and digital channels for the remaining 42.1%.[458] Sales of newspapers have fallen since the 1970s and in 2009 42% of people reported reading a daily national newspaper.[459] In 2010 82.5% of the UK population were Internet users, the highest proportion amongst the 20 countries with the largest total number of users in that year.[460]
Philosophy
Main article: British philosophy
The United Kingdom is famous for the tradition of 'British Empiricism', a branch of the philosophy of knowledge that states that only knowledge verified by experience is valid, and 'Scottish Philosophy', sometimes referred to as the 'Scottish School of Common Sense'.[461] The most famous philosophers of British Empiricism are John Locke, George Berkeley and David Hume; while Dugald Stewart, Thomas Reid and William Hamilton were major exponents of the Scottish "common sense" school. Two Britons are also notable for a theory of moral philosophy utilitarianism, first used by Jeremy Bentham and later by John Stuart Mill in his short work Utilitarianism.[462][463] Other eminent philosophers from the UK and the unions and countries that preceded it include Duns Scotus, John Lilburne, Mary Wollstonecraft, Sir Francis Bacon, Adam Smith, Thomas Hobbes, William of Ockham, Bertrand Russell and A.J. "Freddie" Ayer. Foreign-born philosophers who settled in the UK include Isaiah Berlin, Karl Marx, Karl Popper and Ludwig Wittgenstein.
Sport
Main article: Sport in the United Kingdom
Wembley Stadium, London, home of the England national football team, is one of the most expensive stadia ever built.[464]
Major sports, including association football, tennis, rugby union, rugby league, golf, boxing, rowing and cricket, originated or were substantially developed in the UK and the states that preceded it. With the rules and codes of many modern sports invented and codified in late 19th-century Victorian Britain, in 2012, the President of the IOC, Jacques Rogge, stated; "This great, sports-loving country is widely recognized as the birthplace of modern sport. It was here that the concepts of sportsmanship and fair play were first codified into clear rules and regulations. It was here that sport was included as an educational tool in the school curriculum".[465][466]
In most international competitions, separate teams represent England, Scotland and Wales. Northern Ireland and the Republic of Ireland usually field a single team representing all of Ireland, with notable exceptions being association football and the Commonwealth Games. In sporting contexts, the English, Scottish, Welsh and Irish / Northern Irish teams are often referred to collectively as the Home Nations. There are some sports in which a single team represents the whole of United Kingdom, including the Olympics, where the UK is represented by the Great Britain team. The 1908, 1948 and 2012 Summer Olympics were held in London, making it the first city to host the games three times. Britain has participated in every modern Olympic Games to date and is third in the medal count.
A 2003 poll found that football is the most popular sport in the United Kingdom.[467] Each of the Home Nations has its own football association, national team and league system. The English top division, the Premier League, is the most watched football league in the world.[468] The first-ever international football match was contested by England and Scotland on 30 November 1872.[469] England, Scotland, Wales and Northern Ireland compete as separate countries in international competitions.[470] A Great Britain Olympic football team was assembled for the first time to compete in the London 2012 Olympic Games. However, the Scottish, Welsh and Northern Irish football associations declined to participate, fearing that it would undermine their independent status – a fear confirmed by FIFA president Sepp Blatter.[471]
The Millennium Stadium, Cardiff, opened for the 1999 Rugby World Cup.
Cricket was invented in England. The England cricket team, controlled by the England and Wales Cricket Board,[472] is the only national team in the UK with Test status. Team members are drawn from the main county sides, and include both English and Welsh players. Cricket is distinct from football and rugby where Wales and England field separate national teams, although Wales had fielded its own team in the past. Irish and Scottish players have played for England because neither Scotland nor Ireland have Test status and have only recently started to play in One Day Internationals.[473][474] Scotland, England (and Wales), and Ireland (including Northern Ireland) have competed at the Cricket World Cup, with England reaching the finals on three occasions. There is a professional league championship in which clubs representing 17 English counties and 1 Welsh county compete.[475]
Rugby league is a popular sport in some regions of the UK. It originated in Huddersfield and is generally played in Northern England.[476] A single 'Great Britain Lions' team had competed in the Rugby League World Cup and Test match games, but this changed in 2008 when England, Scotland and Ireland competed as separate nations.[477] Great Britain is still being retained as the full national team for Ashes tours against Australia, New Zealand and France. Super League is the highest level of professional rugby league in the UK and Europe. It consists of 11 teams from Northern England, 1 from London, 1 from Wales and 1 from France.
In rugby union, England, Scotland, Wales, Ireland, France and Italy compete in the Six Nations Championship; the premier international tournament in the northern hemisphere. Sport governing bodies in England, Scotland, Wales and Ireland organise and regulate the game separately.[478] If any of the British teams or the Irish team beat the other three in a tournament, then it is awarded the Triple Crown.[479]
The Wimbledon Championships, a Grand Slam tennis tournament, is held in Wimbledon, London every June or July.
Thoroughbred racing, which originated under Charles II of England as the "sport of kings", is popular throughout the UK with world-famous races including the Grand National, the Epsom Derby, Royal Ascot and the Cheltenham National Hunt Festival (including the Cheltenham Gold Cup). The UK has proved successful in the international sporting arena in rowing.
The UK is closely associated with motorsport. Many teams and drivers in Formula One (F1) are based in the UK, and the country has won more drivers' and constructors' titles than any other. The UK hosted the very first F1 Grand Prix in 1950 at Silverstone, the current location of the British Grand Prix held each year in July. The country also hosts legs of the Grand Prix motorcycle racing, World Rally Championship and FIA World Endurance Championship. The premier national auto racing event is the British Touring Car Championship (BTCC). Motorcycle road racing has a long tradition with races such as the Isle of Man TT and the North West 200.
Golf is the sixth-most popular sport, by participation, in the UK. Although The Royal and Ancient Golf Club of St Andrews in Scotland is the sport's home course,[480] the world's oldest golf course is actually Musselburgh Links' Old Golf Course.[481]
Snooker is one of the UK's popular sporting exports, with the world championships held annually in Sheffield.[482] The modern game of lawn tennis first originated in the city of Birmingham between 1859 and 1865.[483] The Championships, Wimbledon are international tennis events held in Wimbledon in south London every summer and are regarded as the most prestigious event of the global tennis calendar. In Northern Ireland Gaelic football and hurling are popular team sports, both in terms of participation and spectating, and Irish expatriates in the UK and the US also play them.[484] Shinty (or camanachd) is popular in the Scottish Highlands.[485]
Symbols
Main article: Symbols of the United Kingdom, the Channel Islands and the Isle of Man
The Statue of Britannia in Plymouth. Britannia is a national personification of the UK.
The flag of the United Kingdom is the Union Flag (also referred to as the Union Jack). It was created in 1606 by the superimposition of the Flag of England on the Flag of Scotland and updated in 1801 with the addition of Saint Patrick's Flag. Wales is not represented in the Union Flag, as Wales had been conquered and annexed to England prior to the formation of the United Kingdom. The possibility of redesigning the Union Flag to include representation of Wales has not been completely ruled out.[486] The national anthem of the United Kingdom is "God Save the King", with "King" replaced with "Queen" in the lyrics whenever the monarch is a woman.
Britannia is a national personification of the United Kingdom, originating from Roman Britain.[487] Britannia is symbolised as a young woman with brown or golden hair, wearing a Corinthian helmet and white robes. She holds Poseidon's three-pronged trident and a shield, bearing the Union Flag. Sometimes she is depicted as riding on the back of a lion. Since the height of the British Empire in the late 19th century, Britannia has often been associated with British maritime dominance, as in the patriotic song "Rule, Britannia!". Up until 2008, the lion symbol was depicted behind Britannia on the British fifty pence coin and on the back of the British ten pence coin. It is also used as a symbol on the non-ceremonial flag of the British Army. The bulldog is sometimes used as a symbol of the United Kingdom and has been associated with Winston Churchill's defiance of Nazi Germany.[488]
See also
Outline of the United Kingdom
United Kingdom – Wikipedia book
Walking in the United Kingdom
Flag of the United Kingdom.svgUnited Kingdom portal Flag of Europe.svgEuropean Union portal Europe green light.pngEurope portal
Notes
Jump up ^ The Royal coat of arms used in Scotland:
Royal Coat of Arms of the United Kingdom (Scotland).svg
Jump up ^ There is no authorised version of the national anthem as the words are a matter of tradition; only the first verse is usually sung.[1] No law was passed making "God Save the Queen" the official anthem. In the English tradition, such laws are not necessary; proclamation and usage are sufficient to make it the national anthem. "God Save the Queen" also serves as the Royal anthem for several other countries, namely certain Commonwealth realms.
Jump up ^ Under the Council of Europe's European Charter for Regional or Minority Languages, Scots, Ulster-Scots, Welsh, Cornish, Irish and Scottish Gaelic, are officially recognised as regional or minority languages by the British government for the purposes of the Charter. See also Languages of the United Kingdom.[2]
Jump up ^ Although Northern Ireland is the only part of the UK that shares a land border with another state, two of its Overseas Territories also share land borders with other states. Gibraltar shares a border with Spain, while the Sovereign Base Areas of Akrotiri and Dhekelia share borders with the Republic of Cyprus, Turkish Republic of Northern Cyprus and UN buffer zone separating the two Cypriot polities.
Jump up ^ The Anglo-Irish Treaty was signed on 6 December 1921 to resolve the Irish War of Independence. Effective one year later, it established the Irish Free State as a separate dominion within the Commonwealth. The UK's current name was adopted in 1927 to reflect the change.
Jump up ^ Compare to section 1 of both of the 1800 Acts of Union which reads: the Kingdoms of Great Britain and Ireland shall...be united into one Kingdom, by the Name of "The United Kingdom of Great Britain and Ireland"
Jump up ^ New Zealand, Israel and San Marino are the other countries with uncodified constitutions.
Jump up ^ Since the early twentieth century the prime minister has held the office of First Lord of the Treasury, and in recent decades has also held the office of Minister for the Civil Service.
Jump up ^ Sinn Féin, an Irish republican party, also contests elections in the Republic of Ireland.
Jump up ^ In 2007–2008, this was calculated to be £115 per week for single adults with no dependent children; £199 per week for couples with no dependent children; £195 per week for single adults with two dependent children under 14; and £279 per week for couples with two dependent children under 14.
References
Jump up ^ National Anthem, British Monarchy official website. Retrieved 16 November 2013.
^ Jump up to: a b c "List of declarations made with respect to treaty No. 148". Council of Europe. Retrieved 12 December 2013.
^ Jump up to: a b "Population Estimates for UK, England and Wales, Scotland and Northern Ireland, Mid-2013". Office for National Statistics. Retrieved 26 June 2014.
Jump up ^ "2011 UK censuses". Office for National Statistics. Retrieved 17 December 2012.
^ Jump up to: a b c d "United Kingdom". International Monetary Fund. Retrieved 1 November 2014.
Jump up ^ "Gini coefficient of equivalised disposable income (source: SILC)". Eurostat Data Explorer. Retrieved 13 August 2013.
Jump up ^ "2014 Human Development Report". 14 March 2013. pp. 22–25. Retrieved 27 July 2014.
Jump up ^ "Definition of Great Britain in English". Oxford University Press. Retrieved 29 October 2014. Great Britain is the name for the island that comprises England, Scotland, and Wales, although the term is also used loosely to refer to the United Kingdom.
Jump up ^ The British Monarchy, What is constitutional monarchy?. Retrieved 17 July 2013
Jump up ^ CIA, The World Factbook. Retrieved 17 July 2013
Jump up ^ "The World Factbook". Central Intelligence Agency. 1 February 2014. Retrieved 23 February 2014.
^ Jump up to: a b "Countries within a country". Prime Minister's Office. 10 January 2003.
^ Jump up to: a b "Devolution of powers to Scotland, Wales, and Northern Ireland". United Kingdom Government. Retrieved 17 April 2013. In a similar way to how the government is formed from members from the two Houses of Parliament, members of the devolved legislatures nominate ministers from among themselves to comprise an executive, known as the devolved administrations...
Jump up ^ "Fall in UK university students". BBC News. 29 January 2009.
Jump up ^ "Country Overviews: United Kingdom". Transport Research Knowledge Centre. Retrieved 28 March 2010.
Jump up ^ "Key facts about the United Kingdom". Directgov. Retrieved 3 May 2011. The full title of this country is 'the United Kingdom of Great Britain and Northern Ireland'. 'The UK' is made up of England, Scotland, Wales and Northern Ireland. 'Britain' is used informally, usually meaning the United Kingdom. 'Great Britain' is made up of England, Scotland and Wales. The Channel Islands and the Isle of Man are not part of the UK.[dead link]
Jump up ^ "Working with Overseas Territories". Foreign and Commonwealth Office. Retrieved 3 May 2011.
Jump up ^ Mathias, P. (2001). The First Industrial Nation: the Economic History of Britain, 1700–1914. London: Routledge. ISBN 0-415-26672-6.
Jump up ^ Ferguson, Niall (2004). Empire: The rise and demise of the British world order and the lessons for global power. New York: Basic Books. ISBN 0-465-02328-2.
Jump up ^ Sheridan, Greg (15 May 2010). "Cameron has chance to make UK great again". The Australian (Sydney). Retrieved 23 May 2011.
Jump up ^ Dugan, Emily (18 November 2012). "Britain is now most powerful nation on earth". The Independent (London). Retrieved 18 November 2012.
^ Jump up to: a b "The 15 countries with the highest military expenditure in 2013 (table)" (PDF). Stockholm International Peace Research Institute. Retrieved 4 May 2014.
^ Jump up to: a b The Military Balance 2014: Top 15 Defence Budgets 2013 (IISS)
Jump up ^ "Treaty of Union, 1706". Scots History Online. Retrieved 23 August 2011.
Jump up ^ Barnett, Hilaire; Jago, Robert (2011). Constitutional & Administrative Law (8th ed.). Abingdon: Routledge. p. 165. ISBN 978-0-415-56301-7.
Jump up ^ Gascoigne, Bamber. "History of Great Britain (from 1707)". History World. Retrieved 18 July 2011.
Jump up ^ Cottrell, P. (2008). The Irish Civil War 1922–23. p. 85. ISBN 1-84603-270-9.
^ Jump up to: a b S. Dunn; H. Dawson (2000), An Alphabetical Listing of Word, Name and Place in Northern Ireland and the Living Language of Conflict, Lampeter: Edwin Mellen Press, One specific problem - in both general and particular senses - is to know what to call Northern Ireland itself: in the general sense, it is not a country, or a province, or a state - although some refer to it contemptuously as a statelet: the least controversial word appears to be jurisdiction, but this might change.
Jump up ^ "Changes in the list of subdivision names and code elements". ISO 3166-2. International Organization for Standardization. 15 December 2011. Retrieved 28 May 2012.
Jump up ^ Population Trends, Issues 75–82, p.38, 1994, UK Office of Population Censuses and Surveys
Jump up ^ Life in the United Kingdom: a journey to citizenship, p. 7, United Kingdom Home Office, 2007, ISBN 978-0-11-341313-3.
Jump up ^ "Statistical bulletin: Regional Labour Market Statistics". Retrieved 5 March 2014.
Jump up ^ "13.4% Fall In Earnings Value During Recession". Retrieved 5 March 2014.
Jump up ^ Murphy, Dervla (1979). A Place Apart. London: Penguin. ISBN 978-0-14-005030-1.
Jump up ^ Whyte, John; FitzGerald, Garret (1991). Interpreting Northern Ireland. Oxford: Clarendon Press. ISBN 978-0-19-827380-6.
Jump up ^ "Guardian Unlimited Style Guide". London: Guardian News and Media Limited. 19 December 2008. Retrieved 23 August 2011.
Jump up ^ "BBC style guide (Great Britain)". BBC News. 19 August 2002. Retrieved 23 August 2011.
Jump up ^ "Key facts about the United Kingdom". Government, citizens and rights. HM Government. Retrieved 24 August 2011.[dead link]
Jump up ^ "Merriam-Webster Dictionary Online Definition of ''Great Britain''". Merriam Webster. 31 August 2012. Retrieved 9 April 2013.
Jump up ^ New Oxford American Dictionary: "Great Britain: England, Wales, and Scotland considered as a unit. The name is also often used loosely to refer to the United Kingdom."
Jump up ^ "Great Britain". International Olympic Committee. Retrieved 10 May 2011.
Jump up ^ "Team GB – Our Greatest Team". British Olympic Association. Retrieved 10 May 2011.[dead link]
Jump up ^ Bradley, Anthony Wilfred; Ewing, Keith D. (2007). Constitutional and administrative law 1 (14th ed.). Harlow: Pearson Longman. p. 36. ISBN 978-1-4058-1207-8.
Jump up ^ "Which of these best describes the way you think of yourself?". Northern Ireland Life and Times Survey 2010. ARK – Access Research Knowledge. 2010. Retrieved 1 July 2010.
Jump up ^ Schrijver, Frans (2006). Regionalism after regionalisation: Spain, France and the United Kingdom. Amsterdam University Press. pp. 275–277. ISBN 978-90-5629-428-1.
Jump up ^ Jack, Ian (11 December 2010). "Why I'm saddened by Scotland going Gaelic". The Guardian (London).
Jump up ^ Ffeithiau allweddol am y Deyrnas Unedig : Directgov – Llywodraeth, dinasyddion a hawliau[dead link]
Jump up ^ "Ancient skeleton was 'even older'". BBC News. 30 October 2007. Retrieved 27 April 2011.
Jump up ^ Koch, John T. (2006). Celtic culture: A historical encyclopedia. Santa Barbara, CA: ABC-CLIO. p. 973. ISBN 978-1-85109-440-0.
Jump up ^ Davies, John; Jenkins, Nigel; Baines, Menna; Lynch, Peredur I., eds. (2008). The Welsh Academy Encyclopaedia of Wales. Cardiff: University of Wales Press. p. 915. ISBN 978-0-7083-1953-6.
Jump up ^ "Short Athelstan biography". BBC History. Retrieved 9 April 2013.
Jump up ^ Mackie, J.D. (1991). A History of Scotland. London: Penguin. pp. 18–19. ISBN 978-0-14-013649-4.
Jump up ^ Campbell, Ewan (1999). Saints and Sea-kings: The First Kingdom of the Scots. Edinburgh: Canongate. pp. 8–15. ISBN 0-86241-874-7.
Jump up ^ Haigh, Christopher (1990). The Cambridge Historical Encyclopedia of Great Britain and Ireland. Cambridge University Press. p. 30. ISBN 978-0-521-39552-6.
Jump up ^ Ganshof, F.L. (1996). Feudalism. University of Toronto. p. 165. ISBN 978-0-8020-7158-3.
Jump up ^ Chibnall, Marjorie (1999). The debate on the Norman Conquest. Manchester University Press. pp. 115–122. ISBN 978-0-7190-4913-2.
Jump up ^ Keen, Maurice. "The Hundred Years War". BBC History.
Jump up ^ The Reformation in England and Scotland and Ireland: The Reformation Period & Ireland under Elizabth I, Encyclopædia Britannica Online.
Jump up ^ "British History in Depth – Wales under the Tudors". BBC History. 5 November 2009. Retrieved 21 September 2010.
Jump up ^ Nicholls, Mark (1999). A history of the modern British Isles, 1529–1603: The two kingdoms. Oxford: Blackwell. pp. 171–172. ISBN 978-0-631-19334-0.
Jump up ^ Canny, Nicholas P. (2003). Making Ireland British, 1580–1650. Oxford University Press. pp. 189–200. ISBN 978-0-19-925905-2.
Jump up ^ Ross, D. (2002). Chronology of Scottish History. Glasgow: Geddes & Grosset. p. 56. ISBN 1-85534-380-0
Jump up ^ Hearn, J. (2002). Claiming Scotland: National Identity and Liberal Culture. Edinburgh University Press. p. 104. ISBN 1-902930-16-9
Jump up ^ "English Civil Wars". Encyclopaedia Britannica. Retrieved 28 April 2013.
Jump up ^ "Scotland and the Commonwealth: 1651–1660". Archontology.org. 14 March 2010. Retrieved 20 April 2010.
Jump up ^ Lodge, Richard (2007) [1910]. The History of England – From the Restoration to the Death of William III (1660–1702). Read Books. p. 8. ISBN 978-1-4067-0897-4.
Jump up ^ "Tudor Period and the Birth of a Regular Navy". Royal Navy History. Institute of Naval History. Retrieved 24 December 2010.[dead link]
Jump up ^ Canny, Nicholas (1998). The Origins of Empire, The Oxford History of the British Empire Volume I. Oxford University Press. ISBN 0-19-924676-9.
Jump up ^ "Articles of Union with Scotland 1707". UK Parliament. Retrieved 19 October 2008.
Jump up ^ "Acts of Union 1707". UK Parliament. Retrieved 6 January 2011.
Jump up ^ "Treaty (act) of Union 1706". Scottish History online. Retrieved 3 February 2011.
Jump up ^ Library of Congress, The Impact of the American Revolution Abroad, p. 73.
Jump up ^ Loosemore, Jo (2007). Sailing against slavery. BBC Devon. 2007.
Jump up ^ "The Act of Union". Act of Union Virtual Library. Retrieved 15 May 2006.
Jump up ^ Tellier, L.-N. (2009). Urban World History: an Economic and Geographical Perspective. Quebec: PUQ. p. 463. ISBN 2-7605-1588-5.
Jump up ^ Sondhaus, L. (2004). Navies in Modern World History. London: Reaktion Books. p. 9. ISBN 1-86189-202-0.
Jump up ^ Porter, Andrew (1998). The Nineteenth Century, The Oxford History of the British Empire Volume III. Oxford University Press. p. 332. ISBN 0-19-924678-5.
Jump up ^ "The Workshop of the World". BBC History. Retrieved 28 April 2013.
Jump up ^ Porter, Andrew (1998). The Nineteenth Century, The Oxford History of the British Empire Volume III. Oxford University Press. p. 8. ISBN 0-19-924678-5.
Jump up ^ Marshall, P.J. (1996). The Cambridge Illustrated History of the British Empire. Cambridge University Press. pp. 156–57. ISBN 0-521-00254-0.
Jump up ^ Tompson, Richard S. (2003). Great Britain: a reference guide from the Renaissance to the present. New York: Facts on File. p. 63. ISBN 978-0-8160-4474-0.
Jump up ^ Hosch, William L. (2009). World War I: People, Politics, and Power. America at War. New York: Britannica Educational Publishing. p. 21. ISBN 978-1-61530-048-8.
Jump up ^ Turner, John (1988). Britain and the First World War. London: Unwin Hyman. pp. 22–35. ISBN 978-0-04-445109-9.
^ Jump up to: a b Westwell, I.; Cove, D. (eds) (2002). History of World War I, Volume 3. London: Marshall Cavendish. pp. 698 and 705. ISBN 0-7614-7231-2.
Jump up ^ Turner, J. (1988). Britain and the First World War. Abingdon: Routledge. p. 41. ISBN 0-04-445109-1.
Jump up ^ SR&O 1921, No. 533 of 3 May 1921.
Jump up ^ "The Anglo-Irish Treaty, 6 December 1921". CAIN. Retrieved 15 May 2006.
Jump up ^ Rubinstein, W. D. (2004). Capitalism, Culture, and Decline in Britain, 1750–1990. Abingdon: Routledge. p. 11. ISBN 0-415-03719-0.
Jump up ^ "Britain to make its final payment on World War II loan from U.S.". The New York Times. 28 December 2006. Retrieved 25 August 2011.
Jump up ^ Francis, Martin (1997). Ideas and policies under Labour, 1945–1951: Building a new Britain. Manchester University Press. pp. 225–233. ISBN 978-0-7190-4833-3.
Jump up ^ Lee, Stephen J. (1996). Aspects of British political history, 1914–1995. London; New York: Routledge. pp. 173–199. ISBN 978-0-415-13103-2.
Jump up ^ Larres, Klaus (2009). A companion to Europe since 1945. Chichester: Wiley-Blackwell. p. 118. ISBN 978-1-4051-0612-2.
Jump up ^ "Country List". Commonwealth Secretariat. 19 March 2009. Retrieved 11 September 2012.[dead link]
Jump up ^ Julios, Christina (2008). Contemporary British identity: English language, migrants, and public discourse. Studies in migration and diaspora. Aldershot: Ashgate. p. 84. ISBN 978-0-7546-7158-9.
Jump up ^ Aughey, Arthur (2005). The Politics of Northern Ireland: Beyond the Belfast Agreement. London: Routledge. p. 7. ISBN 978-0-415-32788-6.
Jump up ^ "The troubles were over, but the killing continued. Some of the heirs to Ireland's violent traditions refused to give up their inheritance." Holland, Jack (1999). Hope against History: The Course of Conflict in Northern Ireland. New York: Henry Holt. p. 221. ISBN 978-0-8050-6087-4.
Jump up ^ Elliot, Marianne (2007). The Long Road to Peace in Northern Ireland: Peace Lectures from the Institute of Irish Studies at Liverpool University. University of Liverpool Institute of Irish Studies, Liverpool University Press. p. 2. ISBN 1-84631-065-2.
Jump up ^ Dorey, Peter (1995). British politics since 1945. Making contemporary Britain. Oxford: Blackwell. pp. 164–223. ISBN 978-0-631-19075-2.
Jump up ^ Griffiths, Alan; Wall, Stuart (2007). Applied Economics (11th ed.). Harlow: Financial Times Press. p. 6. ISBN 978-0-273-70822-3. Retrieved 26 December 2010.
Jump up ^ Keating, Michael (1 January 1998). "Reforging the Union: Devolution and Constitutional Change in the United Kingdom". Publius: the Journal of Federalism 28 (1): 217. doi:10.1093/oxfordjournals.pubjof.a029948. Retrieved 4 February 2009.
Jump up ^ Jackson, Mike (3 April 2011). "Military action alone will not save Libya". Financial Times (London).
Jump up ^ "United Kingdom country profile". BBC. 24 January 2013. Retrieved 9 April 2013.
Jump up ^ "Scotland to hold independence poll in 2014 – Salmond". BBC News. 10 January 2012. Retrieved 10 January 2012.
Jump up ^ Oxford English Dictionary: "British Isles: a geographical term for the islands comprising Great Britain and Ireland with all their offshore islands including the Isle of Man and the Channel Islands."
^ Jump up to: a b c d e f "United Kingdom". The World Factbook. Central Intelligence Agency. Retrieved 23 September 2008.
^ Jump up to: a b c d e Latimer Clarke Corporation Pty Ltd. "United Kingdom – Atlapedia Online". Atlapedia.com. Retrieved 26 October 2010.
Jump up ^ ROG Learing Team (23 August 2002). "The Prime Meridian at Greenwich". Royal Museums Greenwich. Royal Museums Greenwich. Retrieved 11 September 2012.
Jump up ^ Neal, Clare. "How long is the UK coastline?". British Cartographic Society. Retrieved 26 October 2010.
Jump up ^ "The Channel Tunnel". Eurotunnel. Retrieved 29 November 2010.[dead link]
Jump up ^ "England – Profile". BBC News. 11 February 2010.
Jump up ^ "Scotland Facts". Scotland Online Gateway. Archived from the original on 21 June 2008. Retrieved 16 July 2008.
Jump up ^ Winter, Jon (19 May 2001). "The complete guide to Scottish Islands". The Independent (London).
Jump up ^ "Overview of Highland Boundary Fault". Gazetteer for Scotland. University of Edinburgh. Retrieved 27 December 2010.
Jump up ^ "Ben Nevis Weather". Ben Nevis Weather. Retrieved 26 October 2008.
Jump up ^ "Profile: Wales". BBC News. 9 June 2010. Retrieved 7 November 2010.
Jump up ^ Giles Darkes (26 April 2014). "How long is the UK coastline?". The British Cartographic Society.
Jump up ^ "Geography of Northern Ireland". University of Ulster. Retrieved 22 May 2006.
Jump up ^ "UK climate summaries". Met Office. Retrieved 1 May 2011.
Jump up ^ United Nations Economic and Social Council (August 2007). "Ninth UN Conference on the standardization of Geographical Names". UN Statistics Division. Archived from the original on 1 December 2009. Retrieved 21 October 2008.
Jump up ^ Barlow, I.M. (1991). Metropolitan Government. London: Routledge. ISBN 978-0-415-02099-2.
Jump up ^ "Welcome to the national site of the Government Office Network". Government Offices. Archived from the original on 15 June 2009. Retrieved 3 July 2008.
Jump up ^ "A short history of London government". Greater London Authority. Archived from the original on 21 April 2008. Retrieved 4 October 2008.
Jump up ^ Sherman, Jill; Norfolk, Andrew (5 November 2004). "Prescott's dream in tatters as North East rejects assembly". The Times (London). Retrieved 15 February 2008. The Government is now expected to tear up its twelve-year-old plan to create eight or nine regional assemblies in England to mirror devolution in Scotland and Wales. (subscription required)
Jump up ^ "Local Authority Elections". Local Government Association. Retrieved 3 October 2008.[dead link]
Jump up ^ "STV in Scotland: Local Government Elections 2007". Political Studies Association. Archived from the original on 20 March 2011. Retrieved 2 August 2008.
Jump up ^ Ethical Standards in Public Life framework: "Ethical Standards in Public Life". The Scottish Government. Retrieved 3 October 2008.
Jump up ^ "Who we are". Convention of Scottish Local Authorities. Retrieved 5 July 2011.
Jump up ^ "Local Authorities". The Welsh Assembly Government. Retrieved 31 July 2008.
Jump up ^ "Local government elections in Wales". The Electoral Commission. 2008. Retrieved 8 April 2011.
Jump up ^ "Welsh Local Government Association". Welsh Local Government Association. Retrieved 20 March 2008.
Jump up ^ Devenport, Mark (18 November 2005). "NI local government set for shake-up". BBC News. Retrieved 15 November 2008.
Jump up ^ "Foster announces the future shape of local government" (Press release). Northern Ireland Executive. 13 March 2008. Retrieved 20 October 2008.
Jump up ^ "Local Government elections to be aligned with review of public administration" (Press release). Northern Ireland Office. 25 April 2008. Retrieved 2 August 2008.[dead link]
Jump up ^ "CIBC PWM Global – Introduction to The Cayman Islands". Cibc.com. 11 July 2012. Retrieved 17 August 2012.
Jump up ^ Rappeport, Laurie. "Cayman Islands Tourism". Washington DC: USA Today Travel Tips. Retrieved 9 April 2013.
Jump up ^ "Working with Overseas Territories". Foreign & Commonwealth Office. 6 October 2010. Retrieved 5 November 2010.
Jump up ^ http://www.justice.gov.uk/downloads/about/moj/our-responsibilities/Background_Briefing_on_the_Crown_Dependencies2.pdf
Jump up ^ "Overseas Territories". Foreign & Commonwealth Office. Retrieved 6 September 2010.
Jump up ^ "The World Factbook". CIA. Retrieved 26 December 2010.
Jump up ^ "Country profiles". Foreign & Commonwealth Office. 21 February 2008. Retrieved 6 September 2010.[dead link]
Jump up ^ Davison, Phil (18 August 1995). "Bermudians vote to stay British". The Independent (London). Retrieved 11 September 2012.
Jump up ^ The Committee Office, House of Commons. "House of Commons – Crown Dependencies – Justice Committee". Publications.parliament.uk. Retrieved 7 November 2010.
Jump up ^ Fact sheet on the UK's relationship with the Crown Dependencies – gov.uk, Ministry of Justice. Retrieved 25 August 2014.
Jump up ^ "Profile of Jersey". States of Jersey. Retrieved 31 July 2008. The legislature passes primary legislation, which requires approval by The Queen in Council, and enacts subordinate legislation in many areas without any requirement for Royal Sanction and under powers conferred by primary legislation.
Jump up ^ "Chief Minister to meet Channel Islands counterparts – Isle of Man Public Services" (Press release). Isle of Man Government. 29 May 2012. Retrieved 9 April 2013.[dead link]
Jump up ^ Bagehot, Walter (1867). The English Constitution. London: Chapman and Hall. p. 103.
Jump up ^ Carter, Sarah. "A Guide To the UK Legal System". University of Kent at Canterbury. Retrieved 16 May 2006.
Jump up ^ "Parliamentary sovereignty". UK Parliament. n.d. Archived from the original on 27 May 2012.
Jump up ^ "The Government, Prime Minister and Cabinet". Public services all in one place. Directgov. Retrieved 12 February 2010.
Jump up ^ "Brown is UK's new prime minister". BBC News. 27 June 2007. Retrieved 23 January 2008.
Jump up ^ "David Cameron is UK's new prime minister". BBC News. 11 May 2010. Retrieved 11 May 2010.
Jump up ^ November 2010 "Elections and voting". UK Parliament. Archived from the original on 14 November 2010. Retrieved 14 November 2010.
Jump up ^ November 2010 "The Parliament Acts". UK Parliament. Archived from the original on 14 November 2010.
Jump up ^ "United Kingdom". European Election Database. Norwegian Social Science Data Services. Retrieved 3 July 2010.
Jump up ^ Wainwright, Martin (28 May 2010). "Thirsk and Malton: Conservatives take final seat in parliament". The Guardian (London). Retrieved 3 July 2010.
Jump up ^ "Scots MPs attacked over fees vote". BBC News. 27 January 2004. Retrieved 21 October 2008.
Jump up ^ Taylor, Brian (1 June 1998). "Talking Politics: The West Lothian Question". BBC News. Retrieved 21 October 2008.
Jump up ^ "England-only laws 'need majority from English MPs'". BBC News. 25 March 2013. Retrieved 28 April 2013.
Jump up ^ "Scotland's Parliament – powers and structures". BBC News. 8 April 1999. Retrieved 21 October 2008.
Jump up ^ "Salmond elected as first minister". BBC News. 16 May 2007. Retrieved 21 October 2008.
Jump up ^ "Scottish election: SNP wins election". BBC News. 6 May 2011.
Jump up ^ "Structure and powers of the Assembly". BBC News. 9 April 1999. Retrieved 21 October 2008.
Jump up ^ "Carwyn Jones clinches leadership in Wales". WalesOnline (Media Wales). 1 December 2009. Retrieved 1 December 2009.
Jump up ^ "Devolved Government – Ministers and their departments". Northern Ireland Executive. Archived from the original on 22 August 2007.
Jump up ^ Burrows, N. (1999). "Unfinished Business: The Scotland Act 1998". The Modern Law Review 62 (2): 241–60 [p. 249]. doi:10.1111/1468-2230.00203. The UK Parliament is sovereign and the Scottish Parliament is subordinate. The White Paper had indicated that this was to be the approach taken in the legislation. The Scottish Parliament is not to be seen as a reflection of the settled will of the people of Scotland or of popular sovereignty but as a reflection of its subordination to a higher legal authority. Following the logic of this argument, the power of the Scottish Parliament to legislate can be withdrawn or overridden...
Jump up ^ Elliot, M. (2004). "United Kingdom: Parliamentary sovereignty under pressure". International Journal of Constitutional Law 2 (3): 545–627 [pp. 553–554]. doi:10.1093/icon/2.3.545. Notwithstanding substantial differences among the schemes, an important common factor is that the U.K. Parliament has not renounced legislative sovereignty in relation to the three nations concerned. For example, the Scottish Parliament is empowered to enact primary legislation on all matters, save those in relation to which competence is explicitly denied ... but this power to legislate on what may be termed "devolved matters" is concurrent with the Westminster Parliament's general power to legislate for Scotland on any matter at all, including devolved matters ... In theory, therefore, Westminster may legislate on Scottish devolved matters whenever it chooses...
Jump up ^ Walker, G. (2010). "Scotland, Northern Ireland, and Devolution, 1945–1979". Journal of British Studies 39 (1): 124 & 133. doi:10.1086/644536.
Jump up ^ Gamble, A. "The Constitutional Revolution in the United Kingdom". Publius 36 (1): 19–35 [p. 29]. doi:10.1093/publius/pjj011. The British parliament has the power to abolish the Scottish parliament and the Welsh assembly by a simple majority vote in both houses, but since both were sanctioned by referenda, it would be politically difficult to abolish them without the sanction of a further vote by the people. In this way several of the constitutional measures introduced by the Blair government appear to be entrenched and not subject to a simple exercise of parliamentary sovereignty at Westminster.
Jump up ^ Meehan, E. (1999). "The Belfast Agreement—Its Distinctiveness and Points of Cross-Fertilization in the UK's Devolution Programme". Parliamentary Affairs 52 (1): 19–31 [p. 23]. doi:10.1093/pa/52.1.19. [T]he distinctive involvement of two governments in the Northern Irish problem means that Northern Ireland's new arrangements rest upon an intergovernmental agreement. If this can be equated with a treaty, it could be argued that the forthcoming distribution of power between Westminster and Belfast has similarities with divisions specified in the written constitutions of federal states... Although the Agreement makes the general proviso that Westminster's 'powers to make legislation for Northern Ireland' remains 'unaffected', without an explicit categorical reference to reserved matters, it may be more difficult than in Scotland or Wales for devolved powers to be repatriated. The retraction of devolved powers would not merely entail consultation in Northern Ireland backed implicitly by the absolute power of parliamentary sovereignty but also the renegotiation of an intergovernmental agreement.
Jump up ^ "The Treaty (act) of the Union of Parliament 1706". Scottish History Online. Retrieved 5 October 2008.
Jump up ^ "UK Supreme Court judges sworn in". BBC News. 1 October 2009.
Jump up ^ "Constitutional reform: A Supreme Court for the United Kingdom". Department for Constitutional Affairs. July 2003. Retrieved 13 May 2013.
Jump up ^ "Role of the JCPC". Judicial Committee of the Privy Council. Retrieved 28 April 2013.
Jump up ^ Bainham, Andrew (1998). The international survey of family law: 1996. The Hague: Martinus Nijhoff. p. 298. ISBN 978-90-411-0573-8.
Jump up ^ Adeleye, Gabriel; Acquah-Dadzie, Kofi; Sienkewicz, Thomas; McDonough, James (1999). World dictionary of foreign expressions. Waucojnda, IL: Bolchazy-Carducci. p. 371. ISBN 978-0-86516-423-9.
Jump up ^ "The Australian courts and comparative law". Australian Law Postgraduate Network. Retrieved 28 December 2010.
Jump up ^ "Court of Session – Introduction". Scottish Courts. Retrieved 5 October 2008.[dead link]
Jump up ^ "High Court of Justiciary – Introduction". Scottish Courts. Retrieved 5 October 2008.[dead link]
Jump up ^ "House of Lords – Practice Directions on Permission to Appeal". UK Parliament. Retrieved 22 June 2009.
Jump up ^ "Introduction". Scottish Courts. Retrieved 5 October 2008.[dead link]
Jump up ^ Samuel Bray (2005). "Not proven: introducing a third verdict". The University of Chicago Law Review 72 (4): 1299. Retrieved 30 November 2013.
Jump up ^ "Police-recorded crime down by 9%". BBC News. 17 July 2008. Retrieved 21 October 2008.
Jump up ^ "New record high prison population". BBC News. 8 February 2008. Retrieved 21 October 2008.
Jump up ^ "Crime falls to 32 year low" (Press release). Scottish Government. 7 September 2010. Retrieved 21 April 2011.
Jump up ^ "Prisoner Population at Friday 22 August 2008". Scottish Prison Service. Retrieved 28 August 2008.
Jump up ^ "Scots jail numbers at record high". BBC News. 29 August 2008. Retrieved 21 October 2008.
Jump up ^ Swaine, Jon (13 January 2009). "Barack Obama presidency will strengthen special relationship, says Gordon Brown". The Daily Telegraph (London). Retrieved 3 May 2011.
Jump up ^ Kirchner, E. J.; Sperling, J. (2007). Global Security Governance: Competing Perceptions of Security in the 21st Century. London: Taylor & Francis. p. 100. ISBN 0-415-39162-8
Jump up ^ The Committee Office, House of Commons (19 February 2009). "DFID's expenditure on development assistance". UK Parliament. Retrieved 28 April 2013.
Jump up ^ "Ministry of Defence". Ministry of Defence. Retrieved 21 February 2012.
Jump up ^ "Speaker addresses Her Majesty Queen Elizabeth II". UK Parliament. 30 March 2012. Retrieved 28 April 2013.
Jump up ^ "House of Commons Hansard". UK Parliament. Retrieved 23 October 2008.
Jump up ^ UK 2005: The Official Yearbook of the United Kingdom of Great Britain and Northern Ireland. Office for National Statistics. p. 89.
Jump up ^ "Principles for Economic Regulation". Department for Business, Innovation & Skills. April 2011. Retrieved 1 May 2011.
Jump up ^ "United Kingdom". International Monetary Fund. Retrieved 1 October 2009.
Jump up ^ Chavez-Dreyfuss, Gertrude (1 April 2008). "Global reserves, dollar share up at end of 2007-IMF". Reuters. Retrieved 21 December 2009.
Jump up ^ "More About the Bank". Bank of England. n.d. Archived from the original on 12 March 2008.
Jump up ^ "Index of Services (experimental)". Office for National Statistics. 7 May 2006. Archived from the original on 7 May 2006.
Jump up ^ Sassen, Saskia (2001). The Global City: New York, London, Tokyo (2nd ed.). Princeton University Press. ISBN 0-691-07866-1.
^ Jump up to: a b "Global Financial Centres 7". Z/Yen. 2010. Retrieved 21 April 2010.
^ Jump up to: a b "Worldwide Centres of Commerce Index 2008". Mastercard. Retrieved 5 July 2011.
^ Jump up to: a b Zumbrun, Joshua (15 July 2008). ""World's Most Economically Powerful Cities".". Forbes (New York). Archived from the original on 19 May 2011. Retrieved 3 October 2010.
Jump up ^ "Global city GDP rankings 2008–2025". PricewaterhouseCoopers. Archived from the original on 19 May 2011. Retrieved 16 November 2010.
Jump up ^ Lazarowicz, Mark (Labour MP) (30 April 2003). "Financial Services Industry". UK Parliament. Retrieved 17 October 2008.
Jump up ^ International Tourism Receipts[dead link]. UNWTO Tourism Highlights, Edition 2005. page 12. World Tourism Organisation. Retrieved 24 May 2006.
Jump up ^ Bremner, Caroline (10 January 2010). "Euromonitor International's Top City Destination Ranking". Euromonitor International. Archived from the original on 19 May 2011. Retrieved 31 May 2011.
Jump up ^ "From the Margins to the Mainstream – Government unveils new action plan for the creative industries". DCMS. 9 March 2007. Retrieved 9 March 2007.[dead link]
^ Jump up to: a b "European Countries – United Kingdom". Europa (web portal). Retrieved 15 December 2010.
Jump up ^ Harrington, James W.; Warf, Barney (1995). Industrial location: Principles, practices, and policy. London: Routledge. p. 121. ISBN 978-0-415-10479-1.
Jump up ^ Spielvogel, Jackson J. (2008). Western Civilization: Alternative Volume: Since 1300. Belmont, CA: Thomson Wadsworth. ISBN 978-0-495-55528-5.
Jump up ^ Hewitt, Patricia (15 July 2004). "TUC Manufacturing Conference". Department of Trade and Industry. Retrieved 16 May 2006.
Jump up ^ "Industry topics". Society of Motor Manufacturers and Traders. 2011. Retrieved 5 July 2011.
Jump up ^ Robertson, David (9 January 2009). "The Aerospace industry has thousands of jobs in peril". The Times (London). Retrieved 9 June 2011. (subscription required)
Jump up ^ "Facts & Figures – 2009". Aerospace & Defence Association of Europe. Retrieved 9 June 2011.[dead link]
Jump up ^ "UK Aerospace Industry Survey – 2010". ADS Group. Retrieved 9 June 2011.
^ Jump up to: a b c d http://www.theengineer.co.uk/aerospace/in-depth/reasons-to-be-cheerful-about-the-uk-aerospace-sector/1017274.article
Jump up ^ "The Pharmaceutical sector in the UK". Department for Business, Innovation & Skills. Retrieved 9 June 2011.
Jump up ^ "Ministerial Industry Strategy Group – Pharmaceutical Industry: Competitiveness and Performance Indicators". Department of Health. Retrieved 9 June 2011.[dead link]
Jump up ^ [1][dead link]
Jump up ^ "UK in recession as economy slides". BBC News. 23 January 2009. Retrieved 23 January 2009.
Jump up ^ "UK youth unemployment at its highest in two decades: 22.5%". MercoPress. 15 April 2012.
Jump up ^ Groom, Brian (19 January 2011). "UK youth unemployment reaches record". Financial Times (London).
Jump up ^ "Release: EU Government Debt and Deficit returns". Office for National Statistics. March 2012. Retrieved 17 August 2012.
Jump up ^ "UK loses top AAA credit rating for first time since 1978". BBC News. 23 February 2013. Retrieved 23 February 2013.
Jump up ^ "Britain sees real wages fall 3.2%". Daily Express (London). 2 March 2013.
Jump up ^ Beckford, Martin (5 December 2011). "Gap between rich and poor growing fastest in Britain". The Daily Telegraph (London).
Jump up ^ "United Kingdom: Numbers in low income". The Poverty Site. Retrieved 25 September 2009.
Jump up ^ "United Kingdom: Children in low income households". The Poverty Site. Retrieved 25 September 2009.
Jump up ^ "Warning of food price hike crisis". BBC News. 4 April 2009.
Jump up ^ Andrews, J. (16 January 2013). "How poor is Britain now". Yahoo! Finance UK
Jump up ^ Glynn, S.; Booth, A. (1996). Modern Britain: An Economic and Social History. London: Routledge.
Jump up ^ "Report highlights 'bleak' poverty levels in the UK" Phys.org, 29 March 2013
Jump up ^ Gascoin, J. "A reappraisal of the role of the universities in the Scientific Revolution", in Lindberg, David C. and Westman, Robert S., eds (1990), Reappraisals of the Scientific Revolution. Cambridge University Press. p. 248. ISBN 0-521-34804-8.
Jump up ^ Reynolds, E.E.; Brasher, N.H. (1966). Britain in the Twentieth Century, 1900–1964. Cambridge University Press. p. 336. OCLC 474197910
Jump up ^ Burtt, E.A. (2003) [1924].The Metaphysical Foundations of Modern Science. Mineola, NY: Courier Dover. p. 207. ISBN 0-486-42551-7.
Jump up ^ Hatt, C. (2006). Scientists and Their Discoveries. London: Evans Brothers. pp. 16, 30 and 46. ISBN 0-237-53195-X.
Jump up ^ Jungnickel, C.; McCormmach, R. (1996). Cavendish. American Philosophical Society. ISBN 0-87169-220-1.
Jump up ^ "The Nobel Prize in Physiology or Medicine 1945: Sir Alexander Fleming, Ernst B. Chain, Sir Howard Florey". The Nobel Foundation. Archived from the original on 21 June 2011.
Jump up ^ Hatt, C. (2006). Scientists and Their Discoveries. London: Evans Brothers. p. 56. ISBN 0-237-53195-X.
Jump up ^ James, I. (2010). Remarkable Engineers: From Riquet to Shannon. Cambridge University Press. pp. 33–6. ISBN 0-521-73165-8.
Jump up ^ Bova, Ben (2002) [1932]. The Story of Light. Naperville, IL: Sourcebooks. p. 238. ISBN 978-1-4022-0009-0.
Jump up ^ "Alexander Graham Bell (1847–1922)". Scottish Science Hall of Fame. Archived from the original on 21 June 2011.
Jump up ^ "John Logie Baird (1888–1946)". BBC History. Archived from the original on 21 June 2011.
Jump up ^ Cole, Jeffrey (2011). Ethnic Groups of Europe: An Encyclopedia. Santa Barbara, CA: ABC-CLIO. p. 121. ISBN 1-59884-302-8.
Jump up ^ Castells, M.; Hall, P.; Hall, P.G. (2004). Technopoles of the World: the Making of Twenty-First-Century Industrial Complexes. London: Routledge. pp. 98–100. ISBN 0-415-10015-1.
Jump up ^ "Knowledge, networks and nations: scientific collaborations in the twenty-first century". Royal Society. 2011. Archived from the original on 22 June 2011.
Jump up ^ McCook, Alison. "Is peer review broken?". Reprinted from the Scientist 20(2) 26, 2006. Archived from the original on 21 June 2011.
^ Jump up to: a b "Heathrow 'needs a third runway'". BBC News. 25 June 2008. Retrieved 17 October 2008.
^ Jump up to: a b "Statistics: Top 30 World airports" (Press release). Airports Council International. July 2008. Retrieved 15 October 2008.
Jump up ^ "Transport Statistics Great Britain: 2010". Department for Transport. Archived from the original on 16 December 2010.
Jump up ^ "Major new rail lines considered". BBC News. 21 June 2008. Archived from the original on 9 October 2010.
Jump up ^ "Crossrail's giant tunnelling machines unveiled". BBC News. 2 January 2012.
Jump up ^ Leftly, Mark (29 August 2010). "Crossrail delayed to save £1bn". The Independent on Sunday (London).
^ Jump up to: a b "Size of Reporting Airports October 2009 – September 2010". Civil Aviation Authority. Retrieved 5 December 2010.
Jump up ^ "BMI being taken over by Lufthansa". BBC News. 29 October 2008. Retrieved 23 December 2009.
Jump up ^ "United Kingdom Energy Profile". U.S. Energy Information Administration. Retrieved 4 November 2010.
Jump up ^ Mason, Rowena (24 October 2009). "Let the battle begin over black gold". The Daily Telegraph (London). Retrieved 26 November 2010.
Jump up ^ Heath, Michael (26 November 2010). "RBA Says Currency Containing Prices, Rate Level 'Appropriate' in Near Term". Bloomberg (New York). Retrieved 26 November 2010.
^ Jump up to: a b c "Nuclear Power in the United Kingdom". World Nuclear Association. April 2013. Retrieved 9 April 2013.
^ Jump up to: a b c "United Kingdom – Oil". U.S. Energy Information Administration. Retrieved 4 November 2010.[dead link]
Jump up ^ "Diminishing domestic reserves, escalating imports". EDF Energy. Retrieved 9 April 2013.
^ Jump up to: a b "United Kingdom – Natural Gas". U.S. Energy Information Administration. Retrieved 4 November 2010.[dead link]
^ Jump up to: a b "United Kingdom – Quick Facts Energy Overview". U.S. Energy Information Administration. Retrieved 4 November 2010.[dead link]
Jump up ^ The Coal Authority (10 April 2006). "Coal Reserves in the United Kingdom". The Coal Authority. Archived from the original on 4 January 2009. Retrieved 5 July 2011.
Jump up ^ "England Expert predicts 'coal revolution'". BBC News. 16 October 2007. Retrieved 23 September 2008.
Jump up ^ Watts, Susan (20 March 2012). "Fracking: Concerns over gas extraction regulations". BBC News. Retrieved 9 April 2013.
Jump up ^ "Quit fracking aboot". Friends of the Earth Scotland. Retrieved 9 April 2013.
Jump up ^ "Census Geography". Office for National Statistics. 30 October 2007. Archived from the original on 4 June 2011. Retrieved 14 April 2012.
Jump up ^ "Welcome to the 2011 Census for England and Wales". Office for National Statistics. n.d. Retrieved 11 October 2008.
^ Jump up to: a b c "2011 Census: Population Estimates for the United Kingdom". Office for National Statistics. 27 March 2011. Retrieved 18 December 2012.
^ Jump up to: a b c "Annual Mid-year Population Estimates, 2010". Office for National Statistics. 2011. Retrieved 14 April 2012.
Jump up ^ Batty, David (30 December 2010). "One in six people in the UK today will live to 100, study says". The Guardian (London).
^ Jump up to: a b "2011 UK censuses". Office for National Statistics. Retrieved 18 December 2012.
Jump up ^ "Population: UK population grows to 59.6 million" (Press release). Office for National Statistics. 24 June 2004. Archived from the original on 22 July 2004. Retrieved 14 April 2012.
Jump up ^ Khan, Urmee (16 September 2008). "England is most crowded country in Europe". The Daily Telegraph (London). Retrieved 5 September 2009.
Jump up ^ Carrell, Severin (17 December 2012). "Scotland's population at record high". The Guardian. London. Retrieved 18 December 2012.
^ Jump up to: a b c "Vital Statistics: Population and Health Reference Tables (February 2014 Update): Annual Time Series Data". ONS. Retrieved 27 April 2014.
Jump up ^ Boseley, Sarah (14 July 2008). "The question: What's behind the baby boom?". The Guardian (London). p. 3. Retrieved 28 August 2009.
Jump up ^ Tables, Graphs and Maps Interface (TGM) table. Eurostat (26 February 2013). Retrieved 12 July 2013.
Jump up ^ Campbell, Denis (11 December 2005). "3.6m people in Britain are gay – official". The Observer (London). Retrieved 28 April 2013.
Jump up ^ "2011 Census - Built-up areas". ONS. Retrieved 1 July 2013.
Jump up ^ Mid-2012 Population Estimates for Settlements and Localities in Scotland General Register Office for Scotland
Jump up ^ "Belfast Metropolitan Urban Area NISRA 2005". Retrieved 28 April 2013.
Jump up ^ 2011 Census: KS201UK Ethnic group, local authorities in the United Kingdom, Accessed 21 February 2014
Jump up ^ "Welsh people could be most ancient in UK, DNA suggests". BBC News. 19 June 2012. Retrieved 28 April 2013.
Jump up ^ Thomas, Mark G. et al. "Evidence for a segregated social structure in early Anglo-Saxon England". Proceedings of the Royal Society B: Biological Sciences 273(1601): 2651–2657.
Jump up ^ Owen, James (19 July 2005). "Review of 'The Tribes of Britain'". National Geographic (Washington DC).
Jump up ^ Oppenheimer, Stephen (October 2006). "Myths of British ancestry" at the Wayback Machine (archived 26 September 2006). Prospect (London). Retrieved 5 November 2010.
Jump up ^ Henderson, Mark (23 October 2009). "Scientist – Griffin hijacked my work to make race claim about 'British aborigines'". The Times (London). Retrieved 26 October 2009. (subscription required)
Jump up ^ Costello, Ray (2001). Black Liverpool: The Early History of Britain's Oldest Black Community 1730–1918. Liverpool: Picton Press. ISBN 1-873245-07-6.
Jump up ^ "Culture and Ethnicity Differences in Liverpool – Chinese Community". Chambré Hardman Trust. Retrieved 26 October 2009.
Jump up ^ Coleman, David; Compton, Paul; Salt, John (2002). "The demographic characteristics of immigrant populations", Council of Europe, p.505. ISBN 92-871-4974-7.
Jump up ^ Mason, Chris (30 April 2008). "'Why I left UK to return to Poland'". BBC News.
Jump up ^ "Resident population estimates by ethnic group (percentages): London". Office for National Statistics. Retrieved 23 April 2008.
Jump up ^ "Resident population estimates by ethnic group (percentages): Leicester". Office for National Statistics. Retrieved 23 April 2008.
Jump up ^ "Census 2001 – Ethnicity and religion in England and Wales". Office for National Statistics. Retrieved 23 April 2008.
Jump up ^ Loveys, Kate (22 June 2011). "One in four primary school pupils are from an ethnic minority and almost a million schoolchildren do not speak English as their first language". Daily Mail (London). Retrieved 28 June 2011.
Jump up ^ Rogers, Simon (19 May 2011). "Non-white British population reaches 9.1 million". The Guardian (London).
Jump up ^ Wallop, Harry (18 May 2011). "Population growth of last decade driven by non-white British". The Daily Telegraph (London).
Jump up ^ "Official EU languages". European Commission. 8 May 2009. Retrieved 16 October 2009.
Jump up ^ "Language Courses in New York". United Nations. 2006. Retrieved 29 November 2010.
Jump up ^ "English language – Government, citizens and rights". Directgov. Retrieved 23 August 2011.
Jump up ^ "Commonwealth Secretariat – UK". Commonwealth Secretariat. Retrieved 23 August 2011.
^ Jump up to: a b c "Languages across Europe: United Kingdom". BBC. Retrieved 4 February 2013.
Jump up ^ Booth, Robert (30 January 2013). "Polish becomes England's second language". The Guardian (London). Retrieved 4 February 2012.
Jump up ^ European Charter for Regional or Minority Languages, Strasbourg, 5.XI.1992 - http://conventions.coe.int/treaty/en/Treaties/Html/148.htm
Jump up ^ Framework Convention for the Protection of National Minorities, Strasbourg, 1.II.1995 - http://conventions.coe.int/Treaty/en/Treaties/Html/157.htm
Jump up ^ National Statistics Online – Welsh Language[dead link]. National Statistics Office.
Jump up ^ "Differences in estimates of Welsh Language Skills". Office for National Statistics. Archived from the original on 12 January 2010. Retrieved 30 December 2008.
Jump up ^ Wynn Thomas, Peter (March 2007). "Welsh today". Voices. BBC. Retrieved 5 July 2011.
Jump up ^ "Scotland's Census 2001 – Gaelic Report". General Register Office for Scotland. Retrieved 28 April 2013.
Jump up ^ "Local UK languages 'taking off'". BBC News. 12 February 2009.
Jump up ^ Edwards, John R. (2010). Minority languages and group identity: cases and categories. John Benjamins. pp. 150–158. ISBN 978-90-272-1866-7. Retrieved 12 March 2011.
Jump up ^ Koch, John T. (2006). Celtic culture: a historical encyclopedia. ABC-CLIO. p. 696. ISBN 978-1-85109-440-0.
Jump up ^ "Language Data – Scots". European Bureau for Lesser-Used Languages. Archived from the original on 23 June 2007. Retrieved 2 November 2008.
Jump up ^ "Fall in compulsory language lessons". BBC News. 4 November 2004.
Jump up ^ "The School Gate for parents in Wales". BBC. Retrieved 28 April 2013.
Jump up ^ Cannon, John, ed. (2nd edn., 2009). A Dictionary of British History. Oxford University Press. p. 144. ISBN 0-19-955037-9.
Jump up ^ Field, Clive D. (November 2009). "British religion in numbers"[dead link]. BRIN Discussion Series on Religious Statistics, Discussion Paper 001. Retrieved 3 June 2011.
Jump up ^ Yilmaz, Ihsan (2005). Muslim Laws, Politics and Society in Modern Nation States: Dynamic Legal Pluralisms in England, Turkey, and Pakistan. Aldershot: Ashgate Publishing. pp. 55–6. ISBN 0-7546-4389-1.
Jump up ^ Brown, Callum G. (2006). Religion and Society in Twentieth-Century Britain. Harlow: Pearson Education. p. 291. ISBN 0-582-47289-X.
Jump up ^ Norris, Pippa; Inglehart, Ronald (2004). Sacred and Secular: Religion and Politics Worldwide. Cambridge University Press. p. 84. ISBN 0-521-83984-X.
Jump up ^ Fergusson, David (2004). Church, State and Civil Society. Cambridge University Press. p. 94. ISBN 0-521-52959-X.
Jump up ^ "UK Census 2001". National Office for Statistics. Archived from the original on 12 March 2007. Retrieved 22 April 2007.
Jump up ^ "Religious Populations". Office for National Statistics. 11 October 2004. Archived from the original on 6 June 2011.
Jump up ^ "United Kingdom: New Report Finds Only One in 10 Attend Church". News.adventist.org. 4 April 2007. Retrieved 12 September 2010.
Jump up ^ Philby, Charlotte (12 December 2012). "Less religious and more ethnically diverse: Census reveals a picture of Britain today". The Independent (London).
Jump up ^ The History of the Church of England. The Church of England. Retrieved 23 November 2008.
Jump up ^ "Queen and Church of England". British Monarchy Media Centre. Archived from the original on 8 October 2006. Retrieved 5 June 2010.
Jump up ^ "Queen and the Church". The British Monarchy (Official Website). Archived from the original on 7 June 2011.
Jump up ^ "How we are organised". Church of Scotland. Archived from the original on 7 June 2011.
Jump up ^ Weller, Paul (2005). Time for a Change: Reconfiguring Religion, State, and Society. London: Continuum. pp. 79–80. ISBN 0567084876.
Jump up ^ Peach, Ceri, "United Kingdom, a major transformation of the religious landscape", in H. Knippenberg. ed. (2005). The Changing Religious Landscape of Europe. Amsterdam: Het Spinhuis. pp. 44–58. ISBN 90-5589-248-3.
Jump up ^ Richards, Eric (2004). Britannia's children: Emigration from England, Scotland, Wales and Ireland since 1600. London: Hambledon, p. 143. ISBN 978-1-85285-441-6.
Jump up ^ Gibney, Matthew J.; Hansen, Randall (2005). Immigration and asylum: from 1900 to the present, ABC-CLIO, p. 630. ISBN 1-57607-796-9
Jump up ^ "Short history of immigration". BBC. 2005. Retrieved 28 August 2010.
Jump up ^ Rogers, Simon (11 December 2012). "Census 2011 mapped and charted: England & Wales in religion, immigration and race". London: Guardian. Retrieved 11 December 2012.
Jump up ^ 6.5% of the EU population are foreigners and 9.4% are born abroad, Eurostat, Katya Vasileva, 34/2011.
Jump up ^ Muenz, Rainer (June 2006). "Europe: Population and Migration in 2005". Migration Policy Institute. Retrieved 2 April 2007.
Jump up ^ "Immigration and births to non-British mothers pushes British population to record high". London Evening Standard. 21 August 2008.
Jump up ^ Doughty, Steve; Slack, James (3 June 2008). "Third World migrants behind our 2.3m population boom". Daily Mail (London).
Jump up ^ Bentham, Martin (20 October 2008). "Tories call for tougher control of immigration". London Evening Standard.
Jump up ^ "Minister rejects migrant cap plan". BBC News. 8 September 2008. Retrieved 26 April 2011.
Jump up ^ Johnston, Philip (5 January 2007). "Immigration 'far higher' than figures say". The Daily Telegraph (London). Retrieved 20 April 2007.
Jump up ^ Travis, Alan (25 August 2011). "UK net migration rises 21%". The Guardian (London).
^ Jump up to: a b "Migration Statistics Quarterly Report May 2012". Office for National Statistics. 24 May 2012.
Jump up ^ "Migration to UK more than double government target". BBC News. 24 May 2012.
^ Jump up to: a b "Citizenship". Home Office. August 2011. Retrieved 24 October 2011.[dead link]
Jump up ^ Bamber, David (20 December 2000). "Migrant squad to operate in France". The Daily Telegraph (London).
Jump up ^ "Settlement". Home Office. August 2011. Retrieved 24 October 2011.[dead link]
Jump up ^ "Births in England and Wales by parents' country of birth, 2011". Office for National Statistics. 30 August 2012. Retrieved 28 April 2013.
Jump up ^ "Right of Union citizens and their family members to move and reside freely within the territory of the Member States". European Commission. Retrieved 28 April 2013.
Jump up ^ Doward, Jamie; Temko, Ned (23 September 2007). "Home Office shuts the door on Bulgaria and Romania". The Observer (London). p. 2. Retrieved 23 August 2008.
Jump up ^ Sumption, Madeleine; Somerville, Will (January 2010). The UK's new Europeans: Progress and challenges five years after accession. Policy Report (London: Equality and Human Rights Commission). p. 13. ISBN 978-1-84206-252-4. Retrieved 19 January 2010.
Jump up ^ Doward, Jamie; Rogers, Sam (17 January 2010). "Young, self-reliant, educated: portrait of UK's eastern European migrants". The Observer (London). Retrieved 19 January 2010.
Jump up ^ Hopkirk, Elizabeth (20 October 2008). "Packing up for home: Poles hit by UK's economic downturn". London Evening Standard.
Jump up ^ "Migrants to UK 'returning home'". BBC News. 8 September 2009. Retrieved 8 September 2009.
Jump up ^ "UK sees shift in migration trend". BBC News. 27 May 2010. Retrieved 28 May 2010.
Jump up ^ "Fresh Talent: Working in Scotland". London: UK Border Agency. Retrieved 30 October 2010.
Jump up ^ Boxell, James (28 June 2010). "Tories begin consultation on cap for migrants". Financial Times (London). Retrieved 17 September 2010.
Jump up ^ "Vince Cable: Migrant cap is hurting economy". The Guardian (London). Press Association. 17 September 2010. Retrieved 17 September 2010.
Jump up ^ Richards (2004), pp. 6–7.
^ Jump up to: a b Sriskandarajah, Dhananjayan; Drew, Catherine (11 December 2006). "Brits Abroad: Mapping the scale and nature of British emigration". Institute for Public Policy Research. Retrieved 20 January 2007.
Jump up ^ "Brits Abroad: world overview". BBC. n.d. Retrieved 20 April 2007.
Jump up ^ Casciani, Dominic (11 December 2006). "5.5 m Britons 'opt to live abroad'". BBC News. Retrieved 20 April 2007.
Jump up ^ "Brits Abroad: Country-by-country". BBC News. 11 December 2006.
Jump up ^ "Local Authorities". Department for Children, Schools and Families. Retrieved 21 December 2008.
Jump up ^ Gordon, J.C.B. (1981). Verbal Deficit: A Critique. London: Croom Helm. p. 44 note 18. ISBN 978-0-85664-990-5.
Jump up ^ Section 8 ('Duty of local education authorities to secure provision of primary and secondary schools'), Sections 35–40 ('Compulsory attendance at Primary and Secondary Schools') and Section 61 ('Prohibition of fees in schools maintained by local education authorities ...'), Education Act 1944.
Jump up ^ "England's pupils in global top 10". BBC News. 10 December 2008.
Jump up ^ "More state pupils in universities". BBC News. 19 July 2007.
Jump up ^ MacLeod, Donald (9 November 2007). "Private school pupil numbers in decline". The Guardian (London). Retrieved 31 March 2010.
Jump up ^ Frankel, Hannah (3 September 2010). "Is Oxbridge still a preserve of the posh?". TES (London). Retrieved 9 April 2013.
Jump up ^ "World's top 100 universities 2013: their reputations ranked by Times Higher Education". The Guardian (London). 2013. Retrieved 23 October 2014.
Jump up ^ Davenport, F.; Beech, C.; Downs, T.; Hannigan, D. (2006). Ireland. Lonely Planet, 7th edn. ISBN 1-74059-968-3. p. 564.
Jump up ^ "About SQA". Scottish Qualifications Authority. 10 April 2013. Retrieved 28 April 2013.
Jump up ^ "About Learning and Teaching Scotland". Learning and Teaching Scotland. Retrieved 28 April 2013.
Jump up ^ "Brain drain in reverse". Scotland Online Gateway. July 2002. Archived from the original on 4 December 2007.
Jump up ^ "Increase in private school intake". BBC News. 17 April 2007.
Jump up ^ "MSPs vote to scrap endowment fee". BBC News. 28 February 2008.
Jump up ^ What will your child learn?[dead link] The Welsh Assembly Government. Retrieved 22 January 2010.
Jump up ^ CCEA. "About Us – What we do". Council for the Curriculum Examinations & Assessment. Retrieved 28 April 2013.
Jump up ^ Elitist Britain?, Social Mobility and Child Poverty Commission, 28 August 2014
Jump up ^ Arnett, George (28 August 2014). "Elitism in Britain - breakdown by profession". The Guardian: Datablog.
Jump up ^ Haden, Angela; Campanini, Barbara, eds. (2000). The world health report 2000 – Health systems: improving performance. Geneva: World Health Organisation. ISBN 92-4-156198-X. Retrieved 5 July 2011.
Jump up ^ World Health Organization. "Measuring overall health system performance for 191 countries". New York University. Retrieved 5 July 2011.
Jump up ^ "'Huge contrasts' in devolved NHS". BBC News. 28 August 2008.
Jump up ^ Triggle, Nick (2 January 2008). "NHS now four different systems". BBC News.
Jump up ^ Fisher, Peter. "The NHS from Thatcher to Blair". NHS Consultants Association (International Association of Health Policy). The Budget ... was even more generous to the NHS than had been expected amounting to an annual rise of 7.4% above the rate of inflation for the next 5 years. This would take us to 9.4% of GDP spent on health ie around EU average.
Jump up ^ "OECD Health Data 2009 – How Does the United Kingdom Compare". Paris: Organisation for Economic Co-operation and Development. Retrieved 28 April 2013.[dead link]
Jump up ^ "The cultural superpower: British cultural projection abroad". Journal of the British Politics Society, Norway. Volume 6. No. 1. Winter 2011
Jump up ^ Sheridan, Greg (15 May 2010). "Cameron has chance to make UK great again". The Australian (Sydney). Retrieved 20 May 2012.
Jump up ^ Goldfarb, Jeffrey (10 May 2006). "Bookish Britain overtakes America as top publisher". RedOrbit (Texas). Reuters.
Jump up ^ "William Shakespeare (English author)". Britannica Online encyclopedia. Retrieved 26 February 2006.
Jump up ^ MSN Encarta Encyclopedia article on Shakespeare. Archived from the original on 9 February 2006. Retrieved 26 February 2006.
Jump up ^ William Shakespeare. Columbia Electronic Encyclopedia. Retrieved 26 February 2006.
Jump up ^ "Mystery of Christie's success is solved". The Daily Telegraph (London). 19 December 2005. Retrieved 14 November 2010.
Jump up ^ "All-Time Essential Comics". IGN. Retrieved 15 August 2013.
Jump up ^ Johnston, Rich."Before Watchmen To Double Up For Hardcover Collections". Bleeding Cool. 10 December 2012. Retrieved 15 August 2013.
Jump up ^ "Edinburgh, UK appointed first UNESCO City of Literature". Unesco. 2004. Retrieved 28 April 2013.[dead link]
Jump up ^ "Early Welsh poetry". BBC Wales. Retrieved 29 December 2010.
Jump up ^ Lang, Andrew (2003) [1913]. History of English Literature from Beowulf to Swinburne. Holicong, PA: Wildside Press. p. 42. ISBN 978-0-8095-3229-2.
Jump up ^ "Dafydd ap Gwilym". Academi website. Academi. 2011. Retrieved 3 January 2011. Dafydd ap Gwilym is widely regarded as one of the greatest Welsh poets of all time, and amongst the leading European poets of the Middle Ages.
Jump up ^ True birthplace of Wales's literary hero. BBC News. Retrieved 28 April 2012
Jump up ^ Kate Roberts: Biography at the Wayback Machine. BBC Wales. Retrieved 28 April 2012
Jump up ^ Swift, Jonathan; Fox, Christopher (1995). Gulliver's travels: complete, authoritative text with biographical and historical contexts, critical history, and essays from five contemporary critical perspectives. Basingstoke: Macmillan. p. 10. ISBN 978-0-333-63438-7.
Jump up ^ "Bram Stoker." (PDF). The New York Times. 23 April 1912. Retrieved 1 January 2011.
^ Jump up to: a b "1960–1969". EMI Group. Retrieved 31 May 2008.
^ Jump up to: a b "Paul At Fifty". Time (New York). 8 June 1992.
^ Jump up to: a b Most Successful Group The Guinness Book of Records 1999, p. 230. Retrieved 19 March 2011.
Jump up ^ "British Citizen by Act of Parliament: George Frideric Handel". UK Parliament. 20 July 2009. Retrieved 11 September 2009.[dead link]
Jump up ^ Andrews, John (14 April 2006). "Handel all'inglese". Playbill (New York). Retrieved 11 September 2009.
Jump up ^ Citron, Stephen (2001). Sondheim and Lloyd-Webber: The new musical. London: Chatto & Windus. ISBN 978-1-85619-273-6.
Jump up ^ "Beatles a big hit with downloads". Belfast Telegraph. 25 November 2010. Retrieved 16 May 2011.
Jump up ^ "British rock legends get their own music title for PlayStation3 and PlayStation2" (Press release). EMI. 2 February 2009.
Jump up ^ Khan, Urmee (17 July 2008). "Sir Elton John honoured in Ben and Jerry ice cream". The Daily Telegraph (London).
Jump up ^ Alleyne, Richard (19 April 2008). "Rock group Led Zeppelin to reunite". The Daily Telegraph (London). Retrieved 31 March 2010.
Jump up ^ Fresco, Adam (11 July 2006). "Pink Floyd founder Syd Barrett dies at home". The Times (London). Retrieved 31 March 2010. (subscription required)
Jump up ^ Holton, Kate (17 January 2008). "Rolling Stones sign Universal album deal". Reuters. Retrieved 26 October 2008.
Jump up ^ Walker, Tim (12 May 2008). "Jive talkin': Why Robin Gibb wants more respect for the Bee Gees". The Independent (London). Retrieved 26 October 2008.
Jump up ^ "Brit awards winners list 2012: every winner since 1977". The Guardian (London). Retrieved 28 February 2012.
Jump up ^ Corner, Lewis (16 February 2012). "Adele, Coldplay biggest-selling UK artists worldwide in 2011". Digital Spy. Retrieved 22 March 2012.
Jump up ^ Hughes, Mark (14 January 2008). "A tale of two cities of culture: Liverpool vs Stavanger". The Independent (London). Retrieved 2 August 2009.
Jump up ^ "Glasgow gets city of music honour". BBC News. 20 August 2008. Retrieved 2 August 2009.
Jump up ^ Bayley, Stephen (24 April 2010). "The startling success of Tate Modern". The Times (London). Retrieved 19 January 2011. (subscription required)
Jump up ^ "Vertigo is named 'greatest film of all time'". BBC News. 2 August 2012. Retrieved 18 August 2012.
Jump up ^ "The Directors' Top Ten Directors". British Film Institute. Archived from the original on 27 May 2012.
Jump up ^ "Chaplin, Charles (1889–1977)". British Film Institute. Retrieved 25 January 2011.
Jump up ^ "Powell, Michael (1905–1990)". British Film Institute. Retrieved 25 January 2011.
Jump up ^ "Reed, Carol (1906–1976)". British Film Institute. Retrieved 25 January 2011.
Jump up ^ "Scott, Sir Ridley (1937–)". British Film Institute. Retrieved 25 January 2011.
Jump up ^ "Andrews, Julie (1935–)". British Film Institute. Retrieved 11 December 2010.
Jump up ^ "Burton, Richard (1925–1984)". British Film Institute. Retrieved 11 December 2010.
Jump up ^ "Caine, Michael (1933–)". British Film Institute. Retrieved 11 December 2010.
Jump up ^ "Chaplin, Charles (1889–1977)". British Film Institute. Retrieved 11 December 2010.
Jump up ^ "Connery, Sean (1930–)". British Film Institute. Retrieved 11 December 2010.
Jump up ^ "Leigh, Vivien (1913–1967)". British Film Institute. Retrieved 11 December 2010.
Jump up ^ "Niven, David (1910–1983)". British Film Institute. Retrieved 11 December 2010.
Jump up ^ "Olivier, Laurence (1907–1989)". British Film Institute. Retrieved 11 December 2010.
Jump up ^ "Sellers, Peter (1925–1980)". British Film Institute. Retrieved 11 December 2010.
Jump up ^ "Winslet, Kate (1975–)". British Film Institute. Retrieved 11 December 2010.
Jump up ^ "Daniel Day-Lewis makes Oscar history with third award"'. BBC News. Retrieved 15 August 2013
Jump up ^ "Harry Potter becomes highest-grossing film franchise". The Guardian (London). 11 September 2007. Retrieved 2 November 2010.
Jump up ^ "History of Ealing Studios". Ealing Studios. Retrieved 5 June 2010.
^ Jump up to: a b "UK film – the vital statistics". UK Film Council. Retrieved 22 October 2010.[dead link]
Jump up ^ "The BFI 100". British Film Institute. 6 September 2006. Archived from the original on 1 April 2011.
Jump up ^ "Baftas fuel Oscars race". BBC News. 26 February 2001. Retrieved 14 February 2011.
^ Jump up to: a b "BBC: World's largest broadcaster & Most trusted media brand". Media Newsline. Archived from the original on 5 October 2010. Retrieved 23 September 2010.
^ Jump up to: a b "Digital licence". Prospect. Retrieved 23 September 2010.
^ Jump up to: a b "About the BBC – What is the BBC". BBC Online. Retrieved 23 September 2010.
Jump up ^ Newswire7 (13 August 2009). "BBC: World's largest broadcaster & Most trusted media brand". Media Newsline. Archived from the original on 17 June 2011.
Jump up ^ "TV Licence Fee: facts & figures". BBC Press Office. April 2010. Archived from the original on 17 June 2011.
Jump up ^ "Publications & Policies: The History of ITV". ITV.com. Archived from the original on 17 June 2011.
Jump up ^ "Publishing". News Corporation. Archived from the original on 17 June 2011.
Jump up ^ "Direct Broadcast Satellite Television". News Corporation. Archived from the original on 17 June 2011.
Jump up ^ William, D. (2010). UK Cities: A Look at Life and Major Cities in England, Scotland, Wales and Northern Ireland. Eastbourne: Gardners Books. ISBN 978-9987-16-021-1, pp. 22, 46, 109 and 145.
Jump up ^ "Publishing". Department of Culture, Media and Sport. Archived from the original on 17 June 2011.
Jump up ^ Ofcom "Communication Market Report 2010", 19 August 2010, pp. 97, 164 and 191
Jump up ^ "Social Trends: Lifestyles and social participation". Office for National Statistics. 16 February 2010. Archived from the original on 17 June 2011.
Jump up ^ "Top 20 countries with the highest number of Internet users". Internet World Stats. Archived from the original on 17 June 2011.
Jump up ^ Fieser, James, ed. (2000). A bibliography of Scottish common sense philosophy: Sources and origins. Bristol: Thoemmes Press. Retrieved 17 December 2010.
Jump up ^ Palmer, Michael (1999). Moral Problems in Medicine: A Practical Coursebook. Cambridge: Lutterworth Press. p. 66. ISBN 978-0-7188-2978-0.
Jump up ^ Scarre, Geoffrey (1995). Utilitarianism. London: Routledge. p. 82. ISBN 978-0-415-12197-2.
Jump up ^ Gysin, Christian (9 March 2007). "Wembley kick-off: Stadium is ready and England play first game in fortnight". Daily Mail (London). Retrieved 19 March 2007.
Jump up ^ "Opening ceremony of the games of the XXX Olympiad". Olympic.org. Retrieved 30 November 2013
Jump up ^ "Unparalleled Sporting History" . Reuters. Retrieved 30 November 2013
Jump up ^ "Rugby Union 'Britain's Second Most Popular Sport'". Ipsos-Mori. 22 December 2003. Retrieved 28 April 2013.
Jump up ^ Ebner, Sarah (2 July 2013). "History and time are key to power of football, says Premier League chief". The Times (London). Retrieved 30 November 2013.
Jump up ^ Mitchell, Paul (November 2005). "The first international football match". BBC Sport Scotland. Retrieved 15 December 2013.
Jump up ^ "Why is there no GB Olympics football team?". BBC Sport. 5 August 2008. Retrieved 31 December 2010.
Jump up ^ "Blatter against British 2012 team". BBC News. 9 March 2008. Retrieved 2 April 2008.
Jump up ^ "About ECB". England and Wales Cricket Board. n.d. Retrieved 28 April 2013.
Jump up ^ McLaughlin, Martyn (4 August 2009). "Howzat happen? England fields a Gaelic-speaking Scotsman in Ashes". The Scotsman (Edinburgh). Retrieved 30 December 2010.
Jump up ^ "Uncapped Joyce wins Ashes call up". BBC Sport. 15 November 2006. Retrieved 30 December 2010.
Jump up ^ "Glamorgan". BBC South East Wales. August 2009. Retrieved 30 December 2010.
Jump up ^ Ardener, Shirley (2007). Professional identities: policy and practice in business and bureaucracy. New York: Berghahn. p. 27. ISBN 978-1-84545-054-0.
Jump up ^ "Official Website of Rugby League World Cup 2008". Archived from the original on 16 October 2007.
Jump up ^ Louw, Jaco; Nesbit, Derrick (2008). The Girlfriends Guide to Rugby. Johannesburg: South Publishers. ISBN 978-0-620-39541-0.
Jump up ^ "Triple Crown". RBS 6 Nations. Retrieved 6 March 2011.
Jump up ^ "Tracking the Field". Ipsos MORI. Archived from the original on 5 February 2009. Retrieved 17 October 2008.
Jump up ^ "Links plays into the record books". BBC News. 17 March 2009.
Jump up ^ Chowdhury, Saj (22 January 2007). "China in Ding's hands". BBC Sport. Retrieved 2 January 2011.
Jump up ^ "Lawn Tennis and Major T.Gem". The Birmingham Civic Society. Archived from the original on 18 August 2011. Retrieved 31 December 2010.
Jump up ^ Gould, Joe (10 April 2007). "The ancient Irish sport of hurling catches on in America". Columbia News Service (Columbia Journalism School). Retrieved 17 May 2011.
Jump up ^ "Shinty". Scottishsport.co.uk. Retrieved 28 April 2013.
Jump up ^ "Welsh dragon call for Union flag". BBC News. 27 November 2007. Retrieved 17 October 2008.
Jump up ^ "Britannia on British Coins". Chard. Retrieved 25 June 2006.
Jump up ^ Baker, Steve (2001). Picturing the Beast. University of Illinois Press. p. 52. ISBN 0-252-07030-5.
Further reading
Hitchens, Peter (2000). The Abolition of Britain: from Winston Churchill to Princess Diana. Second ed. San Francisco, Calif.: Encounter Books. xi, 332 p. ISBN 1-893554-18-X.
Lambert, Richard S. (1964). The Great Heritage: a History of Britain for Canadians. House of Grant, 1964 (and earlier editions and/or printings).
External links
Find more about
United Kingdom
at Wikipedia's sister projects
Search Wiktionary Definitions from Wiktionary
Search Commons Media from Commons
Search Wikinews News stories from Wikinews
Search Wikiquote Quotations from Wikiquote
Search Wikisource Source texts from Wikisource
Search Wikibooks Textbooks from Wikibooks
Search Wikivoyage Travel guide from Wikivoyage
Search Wikiversity Learning resources from Wikiversity
Government
Official website of HM Government
Official website of the British Monarchy
Official Yearbook of the United Kingdom statistics
The official site of the British Prime Minister's Office
General information
United Kingdom from the BBC News
United Kingdom entry at The World Factbook
United Kingdom from UCB Libraries GovPubs
United Kingdom at DMOZ
United Kingdom Encyclopædia Britannica entry
United Kingdom from the OECD
United Kingdom at the EU
Wikimedia Atlas of United Kingdom
Geographic data related to United Kingdom at OpenStreetMap
Key Development Forecasts for the United Kingdom from International Futures
Travel
Official tourist guide to Britain
[hide] v t e
United Kingdom topics
History
Chronology
Formation Georgian era Victorian era Edwardian era World War I Interwar World War II UK since 1945 (Postwar Britain)
By topic
Economic Empire Maritime Military
Geography
Administrative
Countries of the United Kingdom Crown dependencies Overseas territories City status Towns Former colonies
Physical
British Isles terminology Great Britain Geology Northern Ireland Lakes and lochs Mountains Rivers Volcanoes
Resources
Energy/Renewable energy Biodiesel Coal Geothermal Hydraulic frac. Hydroelectricity Marine North Sea oil Solar Wind Food Agriculture Fishing English Scottish Hunting Materials Flora Forestry Mining
Politics
Constitution Courts Elections Foreign relations Judiciary Law Law enforcement Legislation Monarchy monarchs Nationality Parliament House of Commons House of Lords Political parties
Government
Cabinet list Civil service Departments Prime Minister list
Military
Royal Navy Army Royal Air Force Weapons of mass destruction
Economy
Banks Bank of England Budget Economic geography Pound (currency) Stock Exchange Taxation Telecommunications Tourism Transport
Society
Affordability of housing Crime Demography Drug policy Education Ethnic groups Health care Immigration Languages Poverty Food banks Prostitution Public holidays Social care Social structure
Culture
Art Cinema Cuisine Identity Literature Media television Music Religion Sport Symbols Theatre
[show]
Countries of the United Kingdom
Outline Index
Book Category Portal WikiProject
[show]
Gnome-globe.svg Geographic locale
[show] v t e
Member states of the European Union
[show]
International organisations
[show] v t e
English-speaking world
[show] v t e
National personifications
Coordinates: 55°N 3°W
Categories: United KingdomBritish IslandsConstitutional monarchiesCountries in EuropeEnglish-speaking countries and territoriesG20 nationsG7 nationsG8 nationsIsland countriesLiberal democraciesMember states of NATOMember states of the Commonwealth of NationsMember states of the Council of EuropeMember states of the European UnionMember states of the Union for the MediterraneanMember states of the United NationsNorthern EuropeWestern Europe
Navigation menu
Create accountLog inArticleTalkReadView sourceView history
Main page
Contents
Featured content
Current events
Random article
Donate to Wikipedia
Wikimedia Shop
Interaction
Help
About Wikipedia
Community portal
Recent changes
Contact page
Tools
What links here
Related changes
Upload file
Special pages
Permanent link
Page information
Wikidata item
Cite this page
Print/export
Create a book
Download as PDF
Printable version
Languages
Адыгэбзэ
Afrikaans
Akan
Alemannisch
አማርኛ
Ænglisc
Аҧсшәа
العربية
Aragonés
ܐܪܡܝܐ
Armãneashti
Arpetan
Asturianu
Avañe'ẽ
Авар
Azərbaycanca
বাংলা
Bahasa Banjar
Bân-lâm-gú
Башҡортса
Беларуская
Беларуская (тарашкевіца)
भोजपुरी
Bikol Central
Bislama
Български
Boarisch
བོད་ཡིག
Bosanski
Brezhoneg
Буряад
Català
Чӑвашла
Cebuano
Čeština
Chavacano de Zamboanga
ChiShona
Corsu
Cymraeg
Dansk
Deutsch
ދިވެހިބަސް
Diné bizaad
Dolnoserbski
ཇོང་ཁ
Eesti
Ελληνικά
Emiliàn e rumagnòl
Español
Esperanto
Estremeñu
Euskara
فارسی
Fiji Hindi
Føroyskt
Français
Frysk
Furlan
Gaeilge
Gaelg
Gagauz
Gàidhlig
Galego
贛語
ગુજરાતી
客家語/Hak-kâ-ngî
Хальмг
한국어
Hausa
Hawaii
Հայերեն
हिन्दी
Hornjoserbsce
Hrvatski
Ido
Igbo
Ilokano
বিষ্ণুপ্রিয়া মণিপুরী
Bahasa Indonesia
Interlingua
Interlingue
Ирон
IsiZulu
Íslenska
Italiano
עברית
Basa Jawa
Kalaallisut
ಕನ್ನಡ
Kapampangan
Къарачай-малкъар
ქართული
Kaszëbsczi
Қазақша
Kernowek
Kinyarwanda
Kiswahili
Коми
Kongo
Kreyòl ayisyen
Kurdî
Кыргызча
Кырык мары
Ladino
Лезги
ລາວ
Latgaļu
Latina
Latviešu
Lëtzebuergesch
Lietuvių
Ligure
Limburgs
Lingála
Lojban
Lumbaart
Magyar
Македонски
Malagasy
മലയാളം
Malti
Māori
मराठी
მარგალური
مصرى
مازِرونی
Bahasa Melayu
Mìng-dĕ̤ng-ngṳ̄
Mirandés
Монгол
မြန်မာဘာသာ
Nāhuatl
Dorerin Naoero
Nederlands
Nedersaksies
नेपाली
नेपाल भाषा
日本語
Napulitano
Нохчийн
Nordfriisk
Norfuk / Pitkern
Norsk bokmål
Norsk nynorsk
Nouormand
Novial
Occitan
Олык марий
ଓଡ଼ିଆ
Oromoo
Oʻzbekcha
ਪੰਜਾਬੀ
Pangasinan
پنجابی
Papiamentu
پښتو
Перем Коми
ភាសាខ្មែរ
Picard
Piemontèis
Tok Pisin
Plattdüütsch
Polski
Ποντιακά
Português
Qırımtatarca
Reo tahiti
Ripoarisch
Română
Romani
Rumantsch
Runa Simi
Русиньскый
Русский
Саха тыла
Sámegiella
संस्कृतम्
Sardu
Scots
Seeltersk
Shqip
Sicilianu
සිංහල
Simple English
SiSwati
Slovenčina
Slovenščina
Словѣньскъ / ⰔⰎⰑⰂⰡⰐⰠⰔⰍⰟ
Ślůnski
Soomaaliga
کوردی
Sranantongo
Српски / srpski
Srpskohrvatski / српскохрватски
Basa Sunda
Suomi
Svenska
Tagalog
தமிழ்
Taqbaylit
Tarandíne
Татарча/tatarça
తెలుగు
Tetun
ไทย
Тоҷикӣ
ᏣᎳᎩ
Tsetsêhestâhese
Türkçe
Twi
Удмурт
ᨅᨔ ᨕᨘᨁᨗ
Українська
اردو
ئۇيغۇرچە / Uyghurche
Vahcuengh
Vèneto
Vepsän kel’
Tiếng Việt
Volapük
Võro
Walon
文言
West-Vlams
Winaray
Wolof
吴语
ייִדיש
Yorùbá
粵語
Zazaki
Zeêuws
Žemaitėška
中文
Edit links
This page was last modified on 22 November 2014 at 11:19.
Text is available under the Creative Commons Attribution-ShareAlike License; additional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.
Privacy policyAbout WikipediaDisclaimersContact WikipediaDevelopersMobile viewWikimedia Foundation Powered by MediaWiki
World Trade Organization
From Wikipedia, the free encyclopedia
"WTO" redirects here. For other uses, see WTO (disambiguation).
World Trade Organization (English)
Organisation mondiale du commerce (French)
Organización Mundial del Comercio (Spanish)
World Trade Organization (logo and wordmark).svg
Official logo of WTO
WTO members and observers.svg
Members
Members, dually represented by the EU
Observers
Non-members
Abbreviation WTO
Formation 1 January 1995; 19 years ago
Type International trade organization
Purpose Liberalize international trade
Headquarters Centre William Rappard, Geneva, Switzerland
Coordinates 46.12°N 6.09°ECoordinates: 46.12°N 6.09°E
Region served Worldwide
Membership 160 member states[1]
Official language English, French, Spanish[2]
Director-General Roberto Azevêdo
Budget 196 million Swiss francs (approx. 209 million US$) in 2011.[3]
Staff 640[4]
Website www.wto.org
The World Trade Organization (WTO) is an organization that intends to supervise and liberalize international trade. The organization officially commenced on 1 January 1995 under the Marrakech Agreement, replacing the General Agreement on Tariffs and Trade (GATT), which commenced in 1948.[5] The organization deals with regulation of trade between participating countries by providing a framework for negotiating and formalizing trade agreements and a dispute resolution process aimed at enforcing participants' adherence to WTO agreements, which are signed by representatives of member governments[6]:fol.9–10 and ratified by their parliaments.[7] Most of the issues that the WTO focuses on derive from previous trade negotiations, especially from the Uruguay Round (1986–1994).
The organization is attempting to complete negotiations on the Doha Development Round, which was launched in 2001 with an explicit focus on addressing the needs of developing countries. As of June 2012, the future of the Doha Round remained uncertain: the work programme lists 21 subjects in which the original deadline of 1 January 2005 was missed, and the round is still incomplete.[8] The conflict between free trade on industrial goods and services but retention of protectionism on farm subsidies to domestic agricultural sector (requested by developed countries) and the substantiation of the international liberalization of fair trade on agricultural products (requested by developing countries) remain the major obstacles. These points of contention have hindered any progress to launch new WTO negotiations beyond the Doha Development Round. As a result of this impasse, there has been an increasing number of bilateral free trade agreements signed.[9] As of July 2012, there were various negotiation groups in the WTO system for the current agricultural trade negotiation which is in the condition of stalemate.[10]
WTO's current Director-General is Roberto Azevêdo,[11][12] who leads a staff of over 600 people in Geneva, Switzerland.[13] A trade facilitation agreement known as the Bali Package was reached by all members on 7 December 2013, the first comprehensive agreement in the organization's history.[14][15]
Contents [hide]
1 History
1.1 GATT rounds of negotiations
1.1.1 From Geneva to Tokyo
1.1.2 Uruguay Round
1.2 Ministerial conferences
1.3 Doha Round (Doha Agenda)
2 Functions
3 Principles of the trading system
4 Organizational structure
5 Decision-making
6 Dispute settlement
7 Accession and membership
7.1 Accession process
7.2 Members and observers
8 Agreements
9 Office of director-general
9.1 List of directors-general
10 See also
11 Notes and references
12 External links
History
The economists Harry White (left) and John Maynard Keynes at the Bretton Woods Conference. Both had been strong advocates of a central-controlled international trade environment and recommended the establishment of three institutions: the IMF (for fiscal and monetary issues); the World Bank (for financial and structural issues); and the ITO (for international economic cooperation).[16]
The WTO's predecessor, the General Agreement on Tariffs and Trade (GATT), was established after World War II in the wake of other new multilateral institutions dedicated to international economic cooperation – notably the Bretton Woods institutions known as the World Bank and the International Monetary Fund. A comparable international institution for trade, named the International Trade Organization was successfully negotiated. The ITO was to be a United Nations specialized agency and would address not only trade barriers but other issues indirectly related to trade, including employment, investment, restrictive business practices, and commodity agreements. But the ITO treaty was not approved by the U.S. and a few other signatories and never went into effect.[17][18][19]
In the absence of an international organization for trade, the GATT would over the years "transform itself" into a de facto international organization.[20]
GATT rounds of negotiations
See also: General Agreement on Tariffs and Trade
The GATT was the only multilateral instrument governing international trade from 1946 until the WTO was established on 1 January 1995.[21] Despite attempts in the mid-1950s and 1960s to create some form of institutional mechanism for international trade, the GATT continued to operate for almost half a century as a semi-institutionalized multilateral treaty regime on a provisional basis.[22]
From Geneva to Tokyo
Seven rounds of negotiations occurred under GATT. The first real GATT trade rounds concentrated on further reducing tariffs. Then, the Kennedy Round in the mid-sixties brought about a GATT anti-dumping Agreement and a section on development. The Tokyo Round during the seventies was the first major attempt to tackle trade barriers that do not take the form of tariffs, and to improve the system, adopting a series of agreements on non-tariff barriers, which in some cases interpreted existing GATT rules, and in others broke entirely new ground. Because these plurilateral agreements were not accepted by the full GATT membership, they were often informally called "codes". Several of these codes were amended in the Uruguay Round, and turned into multilateral commitments accepted by all WTO members. Only four remained plurilateral (those on government procurement, bovine meat, civil aircraft and dairy products), but in 1997 WTO members agreed to terminate the bovine meat and dairy agreements, leaving only two.[21]
Uruguay Round
Main article: Uruguay Round
During the Doha Round, the US government blamed Brazil and India for being inflexible and the EU for impeding agricultural imports.[23] The then-President of Brazil, Luiz Inácio Lula da Silva (above right), responded to the criticisms by arguing that progress would only be achieved if the richest countries (especially the US and countries in the EU) made deeper cuts in agricultural subsidies and further opened their markets for agricultural goods.[24]
Well before GATT's 40th anniversary, its members concluded that the GATT system was straining to adapt to a new globalizing world economy.[25][26] In response to the problems identified in the 1982 Ministerial Declaration (structural deficiencies, spill-over impacts of certain countries' policies on world trade GATT could not manage etc.), the eighth GATT round – known as the Uruguay Round – was launched in September 1986, in Punta del Este, Uruguay.[25]
It was the biggest negotiating mandate on trade ever agreed: the talks were going to extend the trading system into several new areas, notably trade in services and intellectual property, and to reform trade in the sensitive sectors of agriculture and textiles; all the original GATT articles were up for review.[26] The Final Act concluding the Uruguay Round and officially establishing the WTO regime was signed 15 April 1994, during the ministerial meeting at Marrakesh, Morocco, and hence is known as the Marrakesh Agreement.[27]
The GATT still exists as the WTO's umbrella treaty for trade in goods, updated as a result of the Uruguay Round negotiations (a distinction is made between GATT 1994, the updated parts of GATT, and GATT 1947, the original agreement which is still the heart of GATT 1994).[25] GATT 1994 is not however the only legally binding agreement included via the Final Act at Marrakesh; a long list of about 60 agreements, annexes, decisions and understandings was adopted. The agreements fall into a structure with six main parts:
The Agreement Establishing the WTO
Goods and investment – the Multilateral Agreements on Trade in Goods including the GATT 1994 and the Trade Related Investment Measures (TRIMS)
Services — the General Agreement on Trade in Services
Intellectual property – the Agreement on Trade-Related Aspects of Intellectual Property Rights (TRIPS)
Dispute settlement (DSU)
Reviews of governments' trade policies (TPRM)[28]
In terms of the WTO's principle relating to tariff "ceiling-binding" (No. 3), the Uruguay Round has been successful in increasing binding commitments by both developed and developing countries, as may be seen in the percentages of tariffs bound before and after the 1986–1994 talks.[29]
Ministerial conferences
The World Trade Organization Ministerial Conference of 1998, in the Palace of Nations (Geneva, Switzerland).
The highest decision-making body of the WTO is the Ministerial Conference, which usually meets every two years. It brings together all members of the WTO, all of which are countries or customs unions. The Ministerial Conference can take decisions on all matters under any of the multilateral trade agreements. The inaugural ministerial conference was held in Singapore in 1996. Disagreements between largely developed and developing economies emerged during this conference over four issues initiated by this conference, which led to them being collectively referred to as the "Singapore issues". The second ministerial conference was held in Geneva in Switzerland. The third conference in Seattle, Washington ended in failure, with massive demonstrations and police and National Guard crowd-control efforts drawing worldwide attention. The fourth ministerial conference was held in Doha in the Persian Gulf nation of Qatar. The Doha Development Round was launched at the conference. The conference also approved the joining of China, which became the 143rd member to join. The fifth ministerial conference was held in Cancún, Mexico, aiming at forging agreement on the Doha round. An alliance of 22 southern states, the G20 developing nations (led by India, China,[30] Brazil, ASEAN led by the Philippines), resisted demands from the North for agreements on the so-called "Singapore issues" and called for an end to agricultural subsidies within the EU and the US. The talks broke down without progress.
The sixth WTO ministerial conference was held in Hong Kong from 13–18 December 2005. It was considered vital if the four-year-old Doha Development Round negotiations were to move forward sufficiently to conclude the round in 2006. In this meeting, countries agreed to phase out all their agricultural export subsidies by the end of 2013, and terminate any cotton export subsidies by the end of 2006. Further concessions to developing countries included an agreement to introduce duty-free, tariff-free access for goods from the Least Developed Countries, following the Everything but Arms initiative of the European Union — but with up to 3% of tariff lines exempted. Other major issues were left for further negotiation to be completed by the end of 2010. The WTO General Council, on 26 May 2009, agreed to hold a seventh WTO ministerial conference session in Geneva from 30 November-3 December 2009. A statement by chairman Amb. Mario Matus acknowledged that the prime purpose was to remedy a breach of protocol requiring two-yearly "regular" meetings, which had lapsed with the Doha Round failure in 2005, and that the "scaled-down" meeting would not be a negotiating session, but "emphasis will be on transparency and open discussion rather than on small group processes and informal negotiating structures". The general theme for discussion was "The WTO, the Multilateral Trading System and the Current Global Economic Environment"[31]
Doha Round (Doha Agenda)
Main article: Doha Development Round
The Doha Development Round started in 2001 is at an impasse.
The WTO launched the current round of negotiations, the Doha Development Round, at the fourth ministerial conference in Doha, Qatar in November 2001. This was to be an ambitious effort to make globalization more inclusive and help the world's poor, particularly by slashing barriers and subsidies in farming.[32] The initial agenda comprised both further trade liberalization and new rule-making, underpinned by commitments to strengthen substantial assistance to developing countries.[33]
The negotiations have been highly contentious. Disagreements still continue over several key areas including agriculture subsidies, which emerged as critical in July 2006.[34] According to a European Union statement, "The 2008 Ministerial meeting broke down over a disagreement between exporters of agricultural bulk commodities and countries with large numbers of subsistence farmers on the precise terms of a 'special safeguard measure' to protect farmers from surges in imports."[35] The position of the European Commission is that "The successful conclusion of the Doha negotiations would confirm the central role of multilateral liberalisation and rule-making. It would confirm the WTO as a powerful shield against protectionist backsliding."[33] An impasse remains and, as of August 2013, agreement has not been reached, despite intense negotiations at several ministerial conferences and at other sessions. On 27 March 2013, the chairman of agriculture talks announced "a proposal to loosen price support disciplines for developing countries’ public stocks and domestic food aid." He added: “...we are not yet close to agreement—in fact, the substantive discussion of the proposal is only beginning.”[36]
[show]v · t · eGATT and WTO trade rounds[37]
Functions
Among the various functions of the WTO, these are regarded by analysts as the most important:
It oversees the implementation, administration and operation of the covered agreements.[38][39]
It provides a forum for negotiations and for settling disputes.[40][41]
Additionally, it is the WTO's duty to review and propagate the national trade policies, and to ensure the coherence and transparency of trade policies through surveillance in global economic policy-making.[39][41] Another priority of the WTO is the assistance of developing, least-developed and low-income countries in transition to adjust to WTO rules and disciplines through technical cooperation and training.[42]
(i) The WTO shall facilitate the implementation, administration and operation and further the objectives of this Agreement and of the Multilateral Trade Agreements, and shall also provide the frame work for the implementation, administration and operation of the multilateral Trade Agreements.
(ii) The WTO shall provide the forum for negotiations among its members concerning their multilateral trade relations in matters dealt with under the Agreement in the Annexes to this Agreement.
(iii) The WTO shall administer the Understanding on Rules and Procedures Governing the Settlement of Disputes.
(iv) The WTO shall administer Trade Policy Review Mechanism.
(v) With a view to achieving greater coherence in global economic policy making, the WTO shall cooperate, as appropriate, with the international Monetary Fund (IMF) and with the International Bank for Reconstruction and Development (IBRD) and its affiliated agencies. [43]
The above five listings are the additional functions of the World Trade Organization. As globalization proceeds in today's society, the necessity of an International Organization to manage the trading systems has been of vital importance. As the trade volume increases, issues such as protectionism, trade barriers, subsidies, violation of intellectual property arise due to the differences in the trading rules of every nation. The World Trade Organization serves as the mediator between the nations when such problems arise. WTO could be referred to as the product of globalization and also as one of the most important organizations in today's globalized society.
The WTO is also a center of economic research and analysis: regular assessments of the global trade picture in its annual publications and research reports on specific topics are produced by the organization.[44] Finally, the WTO cooperates closely with the two other components of the Bretton Woods system, the IMF and the World Bank.[40]
Principles of the trading system
The WTO establishes a framework for trade policies; it does not define or specify outcomes. That is, it is concerned with setting the rules of the trade policy games.[45] Five principles are of particular importance in understanding both the pre-1994 GATT and the WTO:
Non-discrimination. It has two major components: the most favoured nation (MFN) rule, and the national treatment policy. Both are embedded in the main WTO rules on goods, services, and intellectual property, but their precise scope and nature differ across these areas. The MFN rule requires that a WTO member must apply the same conditions on all trade with other WTO members, i.e. a WTO member has to grant the most favorable conditions under which it allows trade in a certain product type to all other WTO members.[45] "Grant someone a special favour and you have to do the same for all other WTO members."[29] National treatment means that imported goods should be treated no less favorably than domestically produced goods (at least after the foreign goods have entered the market) and was introduced to tackle non-tariff barriers to trade (e.g. technical standards, security standards et al. discriminating against imported goods).[45]
Reciprocity. It reflects both a desire to limit the scope of free-riding that may arise because of the MFN rule, and a desire to obtain better access to foreign markets. A related point is that for a nation to negotiate, it is necessary that the gain from doing so be greater than the gain available from unilateral liberalization; reciprocal concessions intend to ensure that such gains will materialise.[46]
Binding and enforceable commitments. The tariff commitments made by WTO members in a multilateral trade negotiation and on accession are enumerated in a schedule (list) of concessions. These schedules establish "ceiling bindings": a country can change its bindings, but only after negotiating with its trading partners, which could mean compensating them for loss of trade. If satisfaction is not obtained, the complaining country may invoke the WTO dispute settlement procedures.[29][46]
Transparency. The WTO members are required to publish their trade regulations, to maintain institutions allowing for the review of administrative decisions affecting trade, to respond to requests for information by other members, and to notify changes in trade policies to the WTO. These internal transparency requirements are supplemented and facilitated by periodic country-specific reports (trade policy reviews) through the Trade Policy Review Mechanism (TPRM).[47] The WTO system tries also to improve predictability and stability, discouraging the use of quotas and other measures used to set limits on quantities of imports.[29]
Safety valves. In specific circumstances, governments are able to restrict trade. The WTO's agreements permit members to take measures to protect not only the environment but also public health, animal health and plant health.[48]
There are three types of provision in this direction:
articles allowing for the use of trade measures to attain non-economic objectives;
articles aimed at ensuring "fair competition"; members must not use environmental protection measures as a means of disguising protectionist policies.[48]
provisions permitting intervention in trade for economic reasons.[47]
Exceptions to the MFN principle also allow for preferential treatment of developing countries, regional free trade areas and customs unions.[6]:fol.93
Organizational structure
The General Council has the following subsidiary bodies which oversee committees in different areas:
Council for Trade in Goods
There are 11 committees under the jurisdiction of the Goods Council each with a specific task. All members of the WTO participate in the committees. The Textiles Monitoring Body is separate from the other committees but still under the jurisdiction of Goods Council. The body has its own chairman and only 10 members. The body also has several groups relating to textiles.[49]
Council for Trade-Related Aspects of Intellectual Property Rights
Information on intellectual property in the WTO, news and official records of the activities of the TRIPS Council, and details of the WTO's work with other international organizations in the field.[50]
Council for Trade in Services
The Council for Trade in Services operates under the guidance of the General Council and is responsible for overseeing the functioning of the General Agreement on Trade in Services (GATS). It is open to all WTO members, and can create subsidiary bodies as required.[51]
Trade Negotiations Committee
The Trade Negotiations Committee (TNC) is the committee that deals with the current trade talks round. The chair is WTO's director-general. As of June 2012 the committee was tasked with the Doha Development Round.[52]
The Service Council has three subsidiary bodies: financial services, domestic regulations, GATS rules and specific commitments.[49] The council has several different committees, working groups, and working parties.[53] There are committees on the following: Trade and Environment; Trade and Development (Subcommittee on Least-Developed Countries); Regional Trade Agreements; Balance of Payments Restrictions; and Budget, Finance and Administration. There are working parties on the following: Accession. There are working groups on the following: Trade, debt and finance; and Trade and technology transfer.
Decision-making
The WTO describes itself as "a rules-based, member-driven organization — all decisions are made by the member governments, and the rules are the outcome of negotiations among members".[54] The WTO Agreement foresees votes where consensus cannot be reached, but the practice of consensus dominates the process of decision-making.[55]
Richard Harold Steinberg (2002) argues that although the WTO's consensus governance model provides law-based initial bargaining, trading rounds close through power-based bargaining favouring Europe and the U.S., and may not lead to Pareto improvement.[56]
Dispute settlement
Main article: Dispute settlement in the WTO
In 1994, the WTO members agreed on the Understanding on Rules and Procedures Governing the Settlement of Disputes (DSU) annexed to the "Final Act" signed in Marrakesh in 1994.[57] Dispute settlement is regarded by the WTO as the central pillar of the multilateral trading system, and as a "unique contribution to the stability of the global economy".[58] WTO members have agreed that, if they believe fellow-members are violating trade rules, they will use the multilateral system of settling disputes instead of taking action unilaterally.[59]
The operation of the WTO dispute settlement process involves the DSB panels, the Appellate Body, the WTO Secretariat, arbitrators, independent experts and several specialized institutions.[60] Bodies involved in the dispute settlement process, World Trade Organization.
Accession and membership
Main article: World Trade Organization accession and membership
The process of becoming a WTO member is unique to each applicant country, and the terms of accession are dependent upon the country's stage of economic development and current trade regime.[61] The process takes about five years, on average, but it can last more if the country is less than fully committed to the process or if political issues interfere. The shortest accession negotiation was that of the Kyrgyz Republic, while the longest was that of Russia, which, having first applied to join GATT in 1993, was approved for membership in December 2011 and became a WTO member on 22 August 2012.[62] The second longest was that of Vanuatu, whose Working Party on the Accession of Vanuatu was established on 11 July 1995. After a final meeting of the Working Party in October 2001, Vanuatu requested more time to consider its accession terms. In 2008, it indicated its interest to resume and conclude its WTO accession. The Working Party on the Accession of Vanuatu was reconvened informally on 4 April 2011 to discuss Vanuatu's future WTO membership. The re-convened Working Party completed its mandate on 2 May 2011. The General Council formally approved the Accession Package of Vanuatu on 26 October 2011. On 24 August 2012, the WTO welcomed Vanuatu as its 157th member.[63] An offer of accession is only given once consensus is reached among interested parties.[64]
Accession process
WTO accession progress:
Members (including dual-representation with the European Union)
Draft Working Party Report or Factual Summary adopted
Goods and/or Services offers submitted
Memorandum on Foreign Trade Regime (FTR) submitted
Observer, negotiations to start later or no Memorandum on FTR submitted
Frozen procedures or no negotiations in the last 3 years
No official interaction with the WTO
A country wishing to accede to the WTO submits an application to the General Council, and has to describe all aspects of its trade and economic policies that have a bearing on WTO agreements.[65] The application is submitted to the WTO in a memorandum which is examined by a working party open to all interested WTO Members.[66]
After all necessary background information has been acquired, the working party focuses on issues of discrepancy between the WTO rules and the applicant's international and domestic trade policies and laws. The working party determines the terms and conditions of entry into the WTO for the applicant nation, and may consider transitional periods to allow countries some leeway in complying with the WTO rules.[61]
The final phase of accession involves bilateral negotiations between the applicant nation and other working party members regarding the concessions and commitments on tariff levels and market access for goods and services. The new member's commitments are to apply equally to all WTO members under normal non-discrimination rules, even though they are negotiated bilaterally.[65]
When the bilateral talks conclude, the working party sends to the general council or ministerial conference an accession package, which includes a summary of all the working party meetings, the Protocol of Accession (a draft membership treaty), and lists ("schedules") of the member-to-be's commitments. Once the general council or ministerial conference approves of the terms of accession, the applicant's parliament must ratify the Protocol of Accession before it can become a member.[67] Some countries may have faced tougher and a much longer accession process due to challenges during negotiations with other WTO members, such as Vietnam, whose negotiations took more than 11 years before it became official member in January 2007.[68]
Members and observers
The WTO has 160 members and 24 observer governments.[69] In addition to states, the European Union is a member. WTO members do not have to be full sovereign nation-members. Instead, they must be a customs territory with full autonomy in the conduct of their external commercial relations. Thus Hong Kong has been a member since 1995 (as "Hong Kong, China" since 1997) predating the People's Republic of China, which joined in 2001 after 15 years of negotiations. The Republic of China (Taiwan) acceded to the WTO in 2002 as "Separate Customs Territory of Taiwan, Penghu, Kinmen and Matsu" (Chinese Taipei) despite its disputed status.[70] The WTO Secretariat omits the official titles (such as Counselor, First Secretary, Second Secretary and Third Secretary) of the members of Chinese Taipei's Permanent Mission to the WTO, except for the titles of the Permanent Representative and the Deputy Permanent Representative.[71]
As of 2007, WTO member states represented 96.4% of global trade and 96.7% of global GDP.[72] Iran, followed by Algeria, are the economies with the largest GDP and trade outside the WTO, using 2005 data.[73][74] With the exception of the Holy See, observers must start accession negotiations within five years of becoming observers. A number of international intergovernmental organizations have also been granted observer status to WTO bodies.[75] 14 UN member states have no official affiliation with the WTO.
Agreements
Further information: Uruguay Round
The WTO oversees about 60 different agreements which have the status of international legal texts. Member countries must sign and ratify all WTO agreements on accession.[76] A discussion of some of the most important agreements follows. The Agreement on Agriculture came into effect with the establishment of the WTO at the beginning of 1995. The AoA has three central concepts, or "pillars": domestic support, market access and export subsidies. The General Agreement on Trade in Services was created to extend the multilateral trading system to service sector, in the same way as the General Agreement on Tariffs and Trade (GATT) provided such a system for merchandise trade. The agreement entered into force in January 1995. The Agreement on Trade-Related Aspects of Intellectual Property Rights sets down minimum standards for many forms of intellectual property (IP) regulation. It was negotiated at the end of the Uruguay Round of the General Agreement on Tariffs and Trade (GATT) in 1994.[77]
The Agreement on the Application of Sanitary and Phytosanitary Measures—also known as the SPS Agreement—was negotiated during the Uruguay Round of GATT, and entered into force with the establishment of the WTO at the beginning of 1995. Under the SPS agreement, the WTO sets constraints on members' policies relating to food safety (bacterial contaminants, pesticides, inspection and labelling) as well as animal and plant health (imported pests and diseases). The Agreement on Technical Barriers to Trade is an international treaty of the World Trade Organization. It was negotiated during the Uruguay Round of the General Agreement on Tariffs and Trade, and entered into force with the establishment of the WTO at the end of 1994. The object ensures that technical negotiations and standards, as well as testing and certification procedures, do not create unnecessary obstacles to trade".[78] The Agreement on Customs Valuation, formally known as the Agreement on Implementation of Article VII of GATT, prescribes methods of customs valuation that Members are to follow. Chiefly, it adopts the "transaction value" approach.
In December 2013, the biggest agreement within the WTO was signed and known as the Bali Package.[79]
Office of director-general
The headquarters of the World Trade Organization, in Geneva, Switzerland.
The procedures for the appointment of the WTO director-general were published in January 2003.[80] Additionally, there are four deputy directors-general. As of 1 October 2013, under director-general Roberto Azevêdo, the four deputy directors-general are Yi Xiaozhun of China, Karl-Ernst Brauner of Germany, Yonov Frederick Agah of Nigeria and David Shark of the United States.[81]
List of directors-general
Source: Official website[82]
Brazil Roberto Azevedo, 2013–
France Pascal Lamy, 2005–2013
Thailand Supachai Panitchpakdi, 2002–2005
New Zealand Mike Moore, 1999–2002
Italy Renato Ruggiero, 1995–1999
Republic of Ireland Peter Sutherland, 1995
(Heads of the precursor organization, GATT):
Republic of Ireland Peter Sutherland, 1993–1995
Switzerland Arthur Dunkel, 1980–1993
Switzerland Olivier Long, 1968–1980
United Kingdom Eric Wyndham White, 1948–1968
See also
Agreement on Trade Related Investment Measures (TRIMS)
Agreement on Trade-Related Aspects of Intellectual Property Rights (TRIPS)
Aide-mémoire non-paper
Anti-globalization movement
Criticism of the World Trade Organization
Foreign Affiliate Trade Statistics
Global administrative law
Globality
Information Technology Agreement
International Trade Centre
Labour Standards in the World Trade Organisation
List of member states of the World Trade Organization
North American Free Trade Agreement (NAFTA)
Subsidy
Swiss Formula
Trade bloc
Washington Consensus
World Trade Report
World Trade Organization Ministerial Conference of 1999 protest activity
China and the World Trade Organization
Notes and references
Jump up ^ Members and Observers at WTO official website
Jump up ^ Languages, Documentation and Information Management Division at WTO official site
Jump up ^ "WTO Secretariat budget for 2011". WTO official site. Retrieved 25 August 2008.
Jump up ^ Understanding the WTO: What We Stand For_ Fact File
Jump up ^ World Trade Organization - UNDERSTANDING THE WTO: BASICS
^ Jump up to: a b Understanding the WTO Handbook at WTO official website. (Note that the document's printed folio numbers do not match the pdf page numbers.)
Jump up ^ Malanczuk, P. (1999). "International Organisations and Space Law: World Trade Organization". Encyclopaedia Britannica 442. p. 305. Bibcode:1999ESASP.442..305M.
Jump up ^ Understanding the WTO: The Doha Agenda
Jump up ^ The Challenges to the World Trade Organization: It’s All About Legitimacy THE BROOKINGS INSTITUTION, Policy Paper 2011-04
Jump up ^ GROUPS IN THE WTO Updated 1 July 2013
Jump up ^ Bourcier, Nicolas (21 May 2013). "Roberto Azevedo's WTO appointment gives Brazil a seat at the top table". Guardian Weekly. Retrieved 2 September 2013.
Jump up ^ "Roberto Azevêdo takes over". WTO official website. 1 September 2013. Retrieved 2 September 2013.
Jump up ^ "Overview of the WTO Secretariat". WTO official website. Retrieved 2 September 2013.
Jump up ^ Ninth WTO Ministerial Conference | WTO - MC9
Jump up ^ BBC News - WTO agrees global trade deal worth $1tn
Jump up ^ A.E. Eckes Jr., US Trade History, 73
* A. Smithies, Reflections on the Work of Keynes, 578–601
* N. Warren, Internet and Globalization, 193
Jump up ^ P. van den Bossche, The Law and Policy of the World Trade Organization, 80
Jump up ^ Palmeter-Mavroidis, Dispute Settlement, 2
Jump up ^ Fergusson, Ian F. (9 May 2007). "The World Trade Organization: Background and Issues" (PDF). Congressional Research Service. p. 4. Retrieved 15 August 2008.
Jump up ^ It was contemplated that the GATT would be applied for several years until the ITO came into force. However, since the ITO was never brought into being, the GATT gradually became the focus for international governmental cooperation on trade matters with economist Nicholas Halford overseeing the implementation of GATT in members policies. (P. van den Bossche, The Law and Policy of the World Trade Organization, 81; J.H. Jackson, Managing the Trading System, 134).
^ Jump up to: a b The GATT Years: from Havana to Marrakesh, WTO official site
Jump up ^ Footer, M. E. Analysis of the World Trade Organization, 17
Jump up ^ B.S. Klapper, With a "Short Window"
Jump up ^ Lula, Time to Get Serious about Agricultural Subsidies
^ Jump up to: a b c P. Gallagher, The First Ten Years of the WTO, 4
^ Jump up to: a b The Uruguay Round, WTO official site
Jump up ^ "Legal texts – Marrakesh agreement". WTO. Retrieved 30 May 2010.
Jump up ^ Overview: a Navigational Guide, WTO official site. For the complete list of "The Uruguay Round Agreements", see WTO legal texts, WTO official site, and Uruguay Round Agreements, Understandings, Decisions and Declarations, WorldTradeLaw.net
^ Jump up to: a b c d Principles of the Trading System, WTO official site
Jump up ^ "Five Years of China WTO Membership. EU and US Perspectives about China's Compliance with Transparency Commitments and the Transitional Review Mechanism". Papers.ssrn.com. Retrieved 30 May 2010.
Jump up ^ WTO to hold 7th Ministerial Conference on 30 November-2 December 2009 WTO official website
Jump up ^ "In the twilight of Doha". The Economist (The Economist): 65. 27 July 2006.
^ Jump up to: a b European Commission The Doha Round
Jump up ^ Fergusson, Ian F. (18 January 2008). "World Trade Organization Negotiations: The Doha Development Agenda" (PDF). Congressional Research Service. Retrieved 13 April 2012. Page 9 (folio CRS-6)
Jump up ^ WTO trade negotiations: Doha Development Agenda Europa press release, 31 October 2011
Jump up ^ "Members start negotiating proposal on poor countries’ food stockholding". WTO official website. 27 March 2013. Retrieved 2 September 2013.
Jump up ^ a)The GATT years: from Havana to Marrakesh, World Trade Organization
b)Timeline: World Trade Organization – A chronology of key events, BBC News
c)Brakman-Garretsen-Marrewijk-Witteloostuijn, Nations and Firms in the Global Economy, Chapter 10: Trade and Capital Restriction
Jump up ^ Functions of the WTO, IISD
^ Jump up to: a b Main Functions, WTO official site
^ Jump up to: a b A Bredimas, International Economic Law, II, 17
^ Jump up to: a b C. Deere, Decision-making in the WTO: Medieval or Up-to-Date?
Jump up ^ WTO Assistance for Developing Countries[dead link], WTO official site
Jump up ^ Sinha, Aparijita. [1]. "What are the functions and objectives of the WTO?". Retrieved on 13 April, 2014.
Jump up ^ Economic research and analysis, WTO official site
^ Jump up to: a b c B. Hoekman, The WTO: Functions and Basic Principles, 42
^ Jump up to: a b B. Hoekman, The WTO: Functions and Basic Principles, 43
^ Jump up to: a b B. Hoekman, The WTO: Functions and Basic Principles, 44
^ Jump up to: a b Understanding the WTO: What we stand for
^ Jump up to: a b "Fourth level: down to the nitty-gritty". WTO official site. Retrieved 18 August 2008.
Jump up ^ "Intellectual property – overview of TRIPS Agreement". Wto.org. 15 April 1994. Retrieved 30 May 2010.
Jump up ^ "The Services Council, its Committees and other subsidiary bodies". WTO official site. Retrieved 14 August 2008.
Jump up ^ "The Trade Negotiations Committee". WTO official site. Retrieved 14 August 2008.
Jump up ^ "WTO organization chart". WTO official site. Retrieved 14 August 2008.
Jump up ^ Decision-making at WTO official site
Jump up ^ Decision-Making in the World Trade Organization Abstract from Journal of International Economic Law at Oxford Journals
Jump up ^ Steinberg, Richard H. "In the Shadow of Law or Power? Consensus-based Bargaining and Outcomes in the GATT/WTO." International Organization. Spring 2002. pp. 339–374.
Jump up ^ Stewart-Dawyer, The WTO Dispute Settlement System, 7
Jump up ^ S. Panitchpakdi, The WTO at ten, 8.
Jump up ^ Settling Disputes:a Unique Contribution, WTO official site
Jump up ^ "Disputes – Dispute Settlement CBT – WTO Bodies involved in the dispute settlement process – The Dispute Settlement Body (DSB) – Page 1". WTO. 25 July 1996. Retrieved 21 May 2011.
^ Jump up to: a b Accessions Summary, Center for International Development
Jump up ^ Ministerial Conference approves Russia's WTO membership WTO News Item, 16 December 2011
Jump up ^ Accession status: Vanuatu. WTO. Retrieved on 12 July 2013.
Jump up ^ C. Michalopoulos, WTO Accession, 64
^ Jump up to: a b Membership, Alliances and Bureaucracy, WTO official site
Jump up ^ C. Michalopoulos, WTO Accession, 62–63
Jump up ^ How to Become a Member of the WTO, WTO official site
Jump up ^ Napier, Nancy K.; Vuong, Quan Hoang (2013). What we see, why we worry, why we hope: Vietnam going forward. Boise, ID, USA: Boise State University CCI Press. p. 140. ISBN 978-0985530587.
Jump up ^ "Members and Observers". World Trade Organization. 24 August 2012.
Jump up ^ Jackson, J. H. Sovereignty, 109
Jump up ^ ROC Government Publication
Jump up ^ "Accession in perspective". World Trade Organization. Retrieved 22 December 2013.
Jump up ^ "ANNEX 1. STATISTICAL SURVEY". World Trade Organization. 2005. Retrieved 22 December 2013.
Jump up ^ Arjomandy, Danial (21 November 2013). "Iranian Membership in the World Trade Organization: An Unclear Future". Iranian Studies. Retrieved 22 December 2013.
Jump up ^ International intergovernmental organizations granted observer status to WTO bodies at WTO official website
Jump up ^ "Legal texts – the WTO agreements". WTO. Retrieved 30 May 2010.
Jump up ^ Understanding the WTO - Intellectual property: protection and enforcement. WTO. Retrieved on 29 July 2013.
Jump up ^ "A Summary of the Final Act of the Uruguay Round". Wto.org. Retrieved 30 May 2010.
Jump up ^ Zarocostas, John (7 December 2013). "Global Trade Deal Reached". WWD. Retrieved 8 December 2013.
Jump up ^ "WT/L/509". WTO. Retrieved 18 February 2013.
Jump up ^ "Director-General Elect Azevêdo announces his four Deputy Directors-General". 17 August 2013. Retrieved 2 September 2013.
Jump up ^ "Previous GATT and WTO Directors-General". WTO. Retrieved 21 May 2011.
External links
Wikiquote has quotations related to: World Trade Organization
Wikimedia Commons has media related to World Trade Organization.
Official pages
Official WTO homepage
WTO 10th Anniversary PDF (1.40 MB) — Highlights of the first decade, Annual Report 2005 pages 116–166
Glossary of terms—a guide to 'WTO-speak'
International Trade Centre — joint UN/WTO agency
Government pages on the WTO
European Union position on the WTO
Media pages on the WTO
World Trade Organization
BBC News — Profile: WTO
Guardian Unlimited — Special Report: The World Trade Organisation ongoing coverage
Non-governmental organization pages on the WTO
Gatt.org — Parody of official WTO page by The Yes Men
Public Citizen
Transnational Institute: Beyond the WTO
[show] v t e
World Trade Organization
[show] v t e
International trade
[show] v t e
International organizations
Authority control
WorldCat VIAF: 149937768 LCCN: no94018277 ISNI: 0000 0001 2296 2735 GND: 2145784-0 SELIBR: 135910 ULAN: 500292980 NDL: 00577475 NKC: kn20010711437 BNE: XX4574846
Categories: World Trade OrganizationInternational tradeInternational trade organizationsOrganisations based in GenevaOrganizations established in 1995World government
Navigation menu
Create accountLog inArticleTalkReadView sourceView history
Main page
Contents
Featured content
Current events
Random article
Donate to Wikipedia
Wikimedia Shop
Interaction
Help
About Wikipedia
Community portal
Recent changes
Contact page
Tools
What links here
Related changes
Upload file
Special pages
Permanent link
Page information
Wikidata item
Cite this page
Print/export
Create a book
Download as PDF
Printable version
Languages
Afrikaans
العربية
Aragonés
Asturianu
Azərbaycanca
বাংলা
Bân-lâm-gú
Беларуская
Беларуская (тарашкевіца)
Български
Bosanski
Brezhoneg
Català
Čeština
Cymraeg
Dansk
Deutsch
Eesti
Ελληνικά
Español
Esperanto
Euskara
فارسی
Fiji Hindi
Føroyskt
Français
Frysk
Galego
ગુજરાતી
客家語/Hak-kâ-ngî
한국어
Հայերեն
हिन्दी
Hrvatski
Ido
Ilokano
Bahasa Indonesia
Íslenska
Italiano
עברית
Basa Jawa
ಕನ್ನಡ
Къарачай-малкъар
ქართული
Қазақша
Kiswahili
Latina
Latviešu
Lietuvių
Magyar
Македонски
മലയാളം
मराठी
مصرى
Bahasa Melayu
Baso Minangkabau
မြန်မာဘာသာ
Nederlands
नेपाली
नेपाल भाषा
日本語
Нохчийн
Norsk bokmål
Norsk nynorsk
Occitan
Oʻzbekcha
ਪੰਜਾਬੀ
پنجابی
پښتو
ភាសាខ្មែរ
Piemontèis
Polski
Português
Română
Русиньскый
Русский
Саха тыла
Shqip
සිංහල
Simple English
Slovenčina
Slovenščina
کوردی
Српски / srpski
Srpskohrvatski / српскохрватски
Suomi
Svenska
Tagalog
தமிழ்
Татарча/tatarça
తెలుగు
ไทย
Тоҷикӣ
Türkçe
Türkmençe
Українська
اردو
ئۇيغۇرچە / Uyghurche
Tiếng Việt
Winaray
ייִדיש
Yorùbá
粵語
Žemaitėška
中文
Edit links
This page was last modified on 22 November 2014 at 14:33.
Text is available under the Creative Commons Attribution-ShareAlike License; additional terms may apply. By using this site, you agree to the Terms of Use and Privacy Policy. Wikipedia® is a registered trademark of the Wikimedia Foundation, Inc., a non-profit organization.
Privacy policyAbout WikipediaDisclaimersContact WikipediaDevelopersMobile viewWikimedia Foundation Powered by MediaWiki
================================================
FILE: search/facet/facet_builder_datetime.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package facet
import (
"reflect"
"sort"
"time"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
)
var (
reflectStaticSizeDateTimeFacetBuilder int
reflectStaticSizedateTimeRange int
)
func init() {
var dtfb DateTimeFacetBuilder
reflectStaticSizeDateTimeFacetBuilder = int(reflect.TypeOf(dtfb).Size())
var dtr dateTimeRange
reflectStaticSizedateTimeRange = int(reflect.TypeOf(dtr).Size())
}
type dateTimeRange struct {
start time.Time
end time.Time
}
type DateTimeFacetBuilder struct {
size int
field string
termsCount map[string]int
total int
missing int
ranges map[string]*dateTimeRange
sawValue bool
}
func NewDateTimeFacetBuilder(field string, size int) *DateTimeFacetBuilder {
return &DateTimeFacetBuilder{
size: size,
field: field,
termsCount: make(map[string]int),
ranges: make(map[string]*dateTimeRange, 0),
}
}
func (fb *DateTimeFacetBuilder) Size() int {
sizeInBytes := reflectStaticSizeDateTimeFacetBuilder + size.SizeOfPtr +
len(fb.field)
for k := range fb.termsCount {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfInt
}
for k := range fb.ranges {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfPtr + reflectStaticSizedateTimeRange
}
return sizeInBytes
}
func (fb *DateTimeFacetBuilder) AddRange(name string, start, end time.Time) {
r := dateTimeRange{
start: start,
end: end,
}
fb.ranges[name] = &r
}
func (fb *DateTimeFacetBuilder) Field() string {
return fb.field
}
func (fb *DateTimeFacetBuilder) UpdateVisitor(term []byte) {
fb.sawValue = true
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
t := time.Unix(0, i64)
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.start.IsZero() || t.After(r.start) || t.Equal(r.start)) && (r.end.IsZero() || t.Before(r.end)) {
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
fb.total++
}
}
}
}
}
func (fb *DateTimeFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *DateTimeFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++
}
}
func (fb *DateTimeFacetBuilder) Result() *search.FacetResult {
rv := search.FacetResult{
Field: fb.field,
Total: fb.total,
Missing: fb.missing,
}
rv.DateRanges = make([]*search.DateRangeFacet, 0, len(fb.termsCount))
for term, count := range fb.termsCount {
dateRange := fb.ranges[term]
tf := &search.DateRangeFacet{
Name: term,
Count: count,
}
if !dateRange.start.IsZero() {
start := dateRange.start.Format(time.RFC3339Nano)
tf.Start = &start
}
if !dateRange.end.IsZero() {
end := dateRange.end.Format(time.RFC3339Nano)
tf.End = &end
}
rv.DateRanges = append(rv.DateRanges, tf)
}
sort.Sort(rv.DateRanges)
// we now have the list of the top N facets
if fb.size < len(rv.DateRanges) {
rv.DateRanges = rv.DateRanges[:fb.size]
}
notOther := 0
for _, nr := range rv.DateRanges {
notOther += nr.Count
}
rv.Other = fb.total - notOther
return &rv
}
================================================
FILE: search/facet/facet_builder_numeric.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package facet
import (
"reflect"
"sort"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
)
var (
reflectStaticSizeNumericFacetBuilder int
reflectStaticSizenumericRange int
)
func init() {
var nfb NumericFacetBuilder
reflectStaticSizeNumericFacetBuilder = int(reflect.TypeOf(nfb).Size())
var nr numericRange
reflectStaticSizenumericRange = int(reflect.TypeOf(nr).Size())
}
type numericRange struct {
min *float64
max *float64
}
type NumericFacetBuilder struct {
size int
field string
termsCount map[string]int
total int
missing int
ranges map[string]*numericRange
sawValue bool
}
func NewNumericFacetBuilder(field string, size int) *NumericFacetBuilder {
return &NumericFacetBuilder{
size: size,
field: field,
termsCount: make(map[string]int),
ranges: make(map[string]*numericRange, 0),
}
}
func (fb *NumericFacetBuilder) Size() int {
sizeInBytes := reflectStaticSizeNumericFacetBuilder + size.SizeOfPtr +
len(fb.field)
for k := range fb.termsCount {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfInt
}
for k := range fb.ranges {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfPtr + reflectStaticSizenumericRange
}
return sizeInBytes
}
func (fb *NumericFacetBuilder) AddRange(name string, min, max *float64) {
r := numericRange{
min: min,
max: max,
}
fb.ranges[name] = &r
}
func (fb *NumericFacetBuilder) Field() string {
return fb.field
}
func (fb *NumericFacetBuilder) UpdateVisitor(term []byte) {
fb.sawValue = true
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
f64 := numeric.Int64ToFloat64(i64)
// look at each of the ranges for a match
for rangeName, r := range fb.ranges {
if (r.min == nil || f64 >= *r.min) && (r.max == nil || f64 < *r.max) {
fb.termsCount[rangeName] = fb.termsCount[rangeName] + 1
fb.total++
}
}
}
}
}
func (fb *NumericFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *NumericFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++
}
}
func (fb *NumericFacetBuilder) Result() *search.FacetResult {
rv := search.FacetResult{
Field: fb.field,
Total: fb.total,
Missing: fb.missing,
}
rv.NumericRanges = make([]*search.NumericRangeFacet, 0, len(fb.termsCount))
for term, count := range fb.termsCount {
numericRange := fb.ranges[term]
tf := &search.NumericRangeFacet{
Name: term,
Count: count,
Min: numericRange.min,
Max: numericRange.max,
}
rv.NumericRanges = append(rv.NumericRanges, tf)
}
sort.Sort(rv.NumericRanges)
// we now have the list of the top N facets
if fb.size < len(rv.NumericRanges) {
rv.NumericRanges = rv.NumericRanges[:fb.size]
}
notOther := 0
for _, nr := range rv.NumericRanges {
notOther += nr.Count
}
rv.Other = fb.total - notOther
return &rv
}
================================================
FILE: search/facet/facet_builder_numeric_test.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package facet
import (
"strconv"
"testing"
"github.com/blevesearch/bleve/v2/numeric"
)
var pcodedvalues []numeric.PrefixCoded
func init() {
pcodedvalues = []numeric.PrefixCoded{{0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x0, 0x1}, {0x20, 0x0, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f}, {0x20, 0x0, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7f, 0x7a, 0x1d, 0xa}, {0x20, 0x1, 0x0, 0x0, 0x0, 0x0, 0x1, 0x16, 0x9, 0x4a, 0x7b}}
}
func BenchmarkNumericFacet10(b *testing.B) {
numericFacetN(b, 10)
}
func BenchmarkNumericFacet100(b *testing.B) {
numericFacetN(b, 100)
}
func BenchmarkNumericFacet1000(b *testing.B) {
numericFacetN(b, 1000)
}
func numericFacetN(b *testing.B, numTerms int) {
field := "test"
nfb := NewNumericFacetBuilder(field, numTerms)
min, max := 0.0, 9999999998.0
for i := 0; i <= numTerms; i++ {
max++
min--
nfb.AddRange("rangename"+strconv.Itoa(i), &min, &max)
for _, pv := range pcodedvalues {
nfb.StartDoc()
nfb.UpdateVisitor(pv)
nfb.EndDoc()
}
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
nfb.Result()
}
}
================================================
FILE: search/facet/facet_builder_terms.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package facet
import (
"bytes"
"reflect"
"regexp"
"sort"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
)
var reflectStaticSizeTermsFacetBuilder int
func init() {
var tfb TermsFacetBuilder
reflectStaticSizeTermsFacetBuilder = int(reflect.TypeOf(tfb).Size())
}
type TermsFacetBuilder struct {
size int
field string
prefixBytes []byte
regex *regexp.Regexp
termsCount map[string]int
total int
missing int
sawValue bool
}
func NewTermsFacetBuilder(field string, size int) *TermsFacetBuilder {
return &TermsFacetBuilder{
size: size,
field: field,
termsCount: make(map[string]int),
}
}
func (fb *TermsFacetBuilder) Size() int {
sizeInBytes := reflectStaticSizeTermsFacetBuilder + size.SizeOfPtr +
len(fb.field) +
len(fb.prefixBytes) +
size.SizeOfPtr // regex pointer (does not include actual regexp.Regexp object size)
// Estimate regex object size if present.
if fb.regex != nil {
// This is only the static size of regexp.Regexp struct, not including heap allocations.
sizeInBytes += int(reflect.TypeOf(*fb.regex).Size())
// NOTE: Actual memory usage of regexp.Regexp may be higher due to internal allocations.
}
for k := range fb.termsCount {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfInt
}
return sizeInBytes
}
func (fb *TermsFacetBuilder) Field() string {
return fb.field
}
// SetPrefixFilter sets the prefix filter for term facets.
func (fb *TermsFacetBuilder) SetPrefixFilter(prefix string) {
if prefix != "" {
fb.prefixBytes = []byte(prefix)
} else {
fb.prefixBytes = nil
}
}
// SetRegexFilter sets the compiled regex filter for term facets.
func (fb *TermsFacetBuilder) SetRegexFilter(regex *regexp.Regexp) {
fb.regex = regex
}
func (fb *TermsFacetBuilder) UpdateVisitor(term []byte) {
// Total represents all terms visited, not just matching ones.
// This is necessary for the "Other" calculation.
fb.total++
// Fast prefix check on []byte - zero allocation
if len(fb.prefixBytes) > 0 && !bytes.HasPrefix(term, fb.prefixBytes) {
return
}
// Fast regex check on []byte - zero allocation
if fb.regex != nil && !fb.regex.Match(term) {
return
}
// Only convert to string if term matches filters
termStr := string(term)
fb.sawValue = true
fb.termsCount[termStr] = fb.termsCount[termStr] + 1
}
func (fb *TermsFacetBuilder) StartDoc() {
fb.sawValue = false
}
func (fb *TermsFacetBuilder) EndDoc() {
if !fb.sawValue {
fb.missing++
}
}
func (fb *TermsFacetBuilder) Result() *search.FacetResult {
rv := search.FacetResult{
Field: fb.field,
Total: fb.total,
Missing: fb.missing,
}
rv.Terms = &search.TermFacets{}
for term, count := range fb.termsCount {
tf := &search.TermFacet{
Term: term,
Count: count,
}
rv.Terms.Add(tf)
}
sort.Sort(rv.Terms)
// we now have the list of the top N facets
trimTopN := fb.size
if trimTopN > rv.Terms.Len() {
trimTopN = rv.Terms.Len()
}
rv.Terms.TrimToTopN(trimTopN)
notOther := 0
for _, tf := range rv.Terms.Terms() {
notOther += tf.Count
}
rv.Other = fb.total - notOther
return &rv
}
================================================
FILE: search/facet/facet_builder_terms_test.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package facet
import (
"os"
"regexp"
"testing"
)
var terms []string
func init() {
wsRegexp := regexp.MustCompile(`\W+`)
input, err := os.ReadFile("benchmark_data.txt")
if err != nil {
panic(err)
}
terms = wsRegexp.Split(string(input), -1)
}
func BenchmarkTermsFacet10(b *testing.B) {
termsFacetN(b, 10)
}
func BenchmarkTermsFacet100(b *testing.B) {
termsFacetN(b, 100)
}
func BenchmarkTermsFacet1000(b *testing.B) {
termsFacetN(b, 1000)
}
func BenchmarkTermsFacet10000(b *testing.B) {
termsFacetN(b, 10000)
}
// func BenchmarkTermsFacet100000(b *testing.B) {
// termsFacetN(b, 100000)
// }
func termsFacetN(b *testing.B, numTerms int) {
field := "test"
termsLen := len(terms)
tfb := NewTermsFacetBuilder(field, 3)
i := 0
for len(tfb.termsCount) < numTerms && i <= termsLen {
j := i % termsLen
term := terms[j]
tfb.StartDoc()
tfb.UpdateVisitor([]byte(term))
tfb.EndDoc()
i++
}
b.ResetTimer()
for i := 0; i < b.N; i++ {
tfb.Result()
}
}
func TestTermsFacetPrefix(t *testing.T) {
field := "category"
tfb := NewTermsFacetBuilder(field, 10)
tfb.SetPrefixFilter("prod-")
// Add terms with various prefixes
terms := []string{
"prod-server",
"prod-database",
"dev-server",
"dev-database",
"test-server",
"prod-cache",
}
for _, term := range terms {
tfb.StartDoc()
tfb.UpdateVisitor([]byte(term))
tfb.EndDoc()
}
result := tfb.Result()
// Should only have terms with "prod-" prefix
if result.Terms.Len() != 3 {
t.Fatalf("expected 3 matching terms, got %d", result.Terms.Len())
}
// Verify the terms are correct
expectedTerms := map[string]bool{
"prod-server": true,
"prod-database": true,
"prod-cache": true,
}
for _, facet := range result.Terms.Terms() {
if !expectedTerms[facet.Term] {
t.Errorf("unexpected term in results: %s", facet.Term)
}
if facet.Count != 1 {
t.Errorf("expected count 1 for %s, got %d", facet.Term, facet.Count)
}
}
// Total should include all terms (matching + non-matching)
if result.Total != 6 {
t.Errorf("expected total 6, got %d", result.Total)
}
// Other should be 3 (the non-matching terms)
if result.Other != 3 {
t.Errorf("expected other 3, got %d", result.Other)
}
}
func TestTermsFacetRegex(t *testing.T) {
field := "product_code"
// Match pattern: ABC-#### (3 letters, dash, 4 digits) - pattern: ^[A-Z]{3}-\\d{4}$
tfb := NewTermsFacetBuilder(field, 10)
regex, err := regexp.Compile("^[A-Z]{3}-\\d{4}$")
if err != nil {
t.Fatal(err)
}
tfb.SetRegexFilter(regex)
// Add terms with various formats
terms := []string{
"ABC-1234",
"XYZ-5678",
"ABC-999", // too few digits
"ABCD-1234", // too many letters
"ABC-ABCD", // letters instead of digits
"DEF-0000",
}
for _, term := range terms {
tfb.StartDoc()
tfb.UpdateVisitor([]byte(term))
tfb.EndDoc()
}
result := tfb.Result()
// Should only have 3 terms matching the pattern
if result.Terms.Len() != 3 {
t.Fatalf("expected 3 matching terms, got %d", result.Terms.Len())
}
// Verify the terms are correct
expectedTerms := map[string]bool{
"ABC-1234": true,
"XYZ-5678": true,
"DEF-0000": true,
}
for _, facet := range result.Terms.Terms() {
if !expectedTerms[facet.Term] {
t.Errorf("unexpected term in results: %s", facet.Term)
}
if facet.Count != 1 {
t.Errorf("expected count 1 for %s, got %d", facet.Term, facet.Count)
}
}
// Total should include all terms
if result.Total != 6 {
t.Errorf("expected total 6, got %d", result.Total)
}
// Other should be 3 (the non-matching terms)
if result.Other != 3 {
t.Errorf("expected other 3, got %d", result.Other)
}
}
func TestTermsFacetPrefixAndRegex(t *testing.T) {
field := "tag"
// Both prefix "env:" and regex pattern for prod/staging only
tfb := NewTermsFacetBuilder(field, 10)
tfb.SetPrefixFilter("env:")
regex, err := regexp.Compile("^env:(prod|staging)$")
if err != nil {
t.Fatal(err)
}
tfb.SetRegexFilter(regex)
// Add various terms
terms := []string{
"env:prod",
"env:staging",
"env:dev", // has prefix but doesn't match regex
"env:test", // has prefix but doesn't match regex
"type:server", // no prefix
"env:prod", // duplicate
"env:staging", // duplicate
}
for _, term := range terms {
tfb.StartDoc()
tfb.UpdateVisitor([]byte(term))
tfb.EndDoc()
}
result := tfb.Result()
// Should only have 2 unique terms (env:prod and env:staging)
if result.Terms.Len() != 2 {
t.Fatalf("expected 2 matching terms, got %d", result.Terms.Len())
}
// Verify the terms and counts
termCounts := make(map[string]int)
for _, facet := range result.Terms.Terms() {
termCounts[facet.Term] = facet.Count
}
if termCounts["env:prod"] != 2 {
t.Errorf("expected count 2 for env:prod, got %d", termCounts["env:prod"])
}
if termCounts["env:staging"] != 2 {
t.Errorf("expected count 2 for env:staging, got %d", termCounts["env:staging"])
}
// Total should be all 7 terms
if result.Total != 7 {
t.Errorf("expected total 7, got %d", result.Total)
}
// Other should be 3 (env:dev, env:test, type:server)
if result.Other != 3 {
t.Errorf("expected other 3, got %d", result.Other)
}
}
func TestTermsFacetInvalidRegex(t *testing.T) {
// Invalid regex pattern (unmatched bracket)
_, err := regexp.Compile("[invalid")
if err == nil {
t.Fatal("expected error for invalid regex, got nil")
}
}
func TestTermsFacetNoFilter(t *testing.T) {
field := "tag"
tfb := NewTermsFacetBuilder(field, 2)
terms := []string{"apple", "banana", "cherry", "apple"}
for _, term := range terms {
tfb.StartDoc()
tfb.UpdateVisitor([]byte(term))
tfb.EndDoc()
}
result := tfb.Result()
// Should return top 2 by count
if result.Terms.Len() != 2 {
t.Fatalf("expected 2 terms, got %d", result.Terms.Len())
}
// Apple should be first with count 2
facets := result.Terms.Terms()
if facets[0].Term != "apple" || facets[0].Count != 2 {
t.Errorf("expected apple with count 2, got %s with count %d", facets[0].Term, facets[0].Count)
}
// Total should be 4
if result.Total != 4 {
t.Errorf("expected total 4, got %d", result.Total)
}
// Other should be 1 (cherry was trimmed)
if result.Other != 1 {
t.Errorf("expected other 1, got %d", result.Other)
}
}
================================================
FILE: search/facets_builder.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"reflect"
"sort"
"github.com/blevesearch/bleve/v2/size"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeFacetsBuilder int
var reflectStaticSizeFacetResult int
var reflectStaticSizeTermFacet int
var reflectStaticSizeNumericRangeFacet int
var reflectStaticSizeDateRangeFacet int
func init() {
var fb FacetsBuilder
reflectStaticSizeFacetsBuilder = int(reflect.TypeOf(fb).Size())
var fr FacetResult
reflectStaticSizeFacetResult = int(reflect.TypeOf(fr).Size())
var tf TermFacet
reflectStaticSizeTermFacet = int(reflect.TypeOf(tf).Size())
var nrf NumericRangeFacet
reflectStaticSizeNumericRangeFacet = int(reflect.TypeOf(nrf).Size())
var drf DateRangeFacet
reflectStaticSizeDateRangeFacet = int(reflect.TypeOf(drf).Size())
}
type FacetBuilder interface {
StartDoc()
UpdateVisitor(term []byte)
EndDoc()
Result() *FacetResult
Field() string
Size() int
}
type FacetsBuilder struct {
indexReader index.IndexReader
facetNames []string
facets []FacetBuilder
facetsByField map[string][]FacetBuilder
fields []string
}
func NewFacetsBuilder(indexReader index.IndexReader) *FacetsBuilder {
return &FacetsBuilder{
indexReader: indexReader,
}
}
func (fb *FacetsBuilder) Size() int {
sizeInBytes := reflectStaticSizeFacetsBuilder + size.SizeOfPtr
for k, v := range fb.facets {
sizeInBytes += size.SizeOfString + v.Size() + len(fb.facetNames[k])
}
for _, entry := range fb.fields {
sizeInBytes += size.SizeOfString + len(entry)
}
return sizeInBytes
}
func (fb *FacetsBuilder) Add(name string, facetBuilder FacetBuilder) {
if fb.facetsByField == nil {
fb.facetsByField = map[string][]FacetBuilder{}
}
fb.facetNames = append(fb.facetNames, name)
fb.facets = append(fb.facets, facetBuilder)
fb.facetsByField[facetBuilder.Field()] = append(fb.facetsByField[facetBuilder.Field()], facetBuilder)
fb.fields = append(fb.fields, facetBuilder.Field())
}
func (fb *FacetsBuilder) RequiredFields() []string {
return fb.fields
}
func (fb *FacetsBuilder) StartDoc() {
for _, facetBuilder := range fb.facets {
facetBuilder.StartDoc()
}
}
func (fb *FacetsBuilder) EndDoc() {
for _, facetBuilder := range fb.facets {
facetBuilder.EndDoc()
}
}
func (fb *FacetsBuilder) UpdateVisitor(field string, term []byte) {
if facetBuilders, ok := fb.facetsByField[field]; ok {
for _, facetBuilder := range facetBuilders {
facetBuilder.UpdateVisitor(term)
}
}
}
type TermFacet struct {
Term string `json:"term"`
Count int `json:"count"`
}
type TermFacets struct {
termFacets []*TermFacet
termLookup map[string]*TermFacet
}
func (tf *TermFacets) Terms() []*TermFacet {
if tf == nil {
return []*TermFacet{}
}
return tf.termFacets
}
func (tf *TermFacets) TrimToTopN(n int) {
tf.termFacets = tf.termFacets[:n]
}
func (tf *TermFacets) Add(termFacets ...*TermFacet) {
for _, termFacet := range termFacets {
if tf.termLookup == nil {
tf.termLookup = map[string]*TermFacet{}
}
if term, ok := tf.termLookup[termFacet.Term]; ok {
term.Count += termFacet.Count
return
}
// if we got here it wasn't already in the existing terms
tf.termFacets = append(tf.termFacets, termFacet)
tf.termLookup[termFacet.Term] = termFacet
}
}
func (tf *TermFacets) Len() int {
// Handle case where *TermFacets is not fully initialized in index_impl.go.init()
if tf == nil {
return 0
}
return len(tf.termFacets)
}
func (tf *TermFacets) Swap(i, j int) {
tf.termFacets[i], tf.termFacets[j] = tf.termFacets[j], tf.termFacets[i]
}
func (tf *TermFacets) Less(i, j int) bool {
if tf.termFacets[i].Count == tf.termFacets[j].Count {
return tf.termFacets[i].Term < tf.termFacets[j].Term
}
return tf.termFacets[i].Count > tf.termFacets[j].Count
}
// TermFacets used to be a type alias for []*TermFacet.
// To maintain backwards compatibility, we have to implement custom
// JSON marshalling.
func (tf *TermFacets) MarshalJSON() ([]byte, error) {
return util.MarshalJSON(tf.termFacets)
}
func (tf *TermFacets) UnmarshalJSON(b []byte) error {
termFacets := []*TermFacet{}
err := util.UnmarshalJSON(b, &termFacets)
if err != nil {
return err
}
for _, termFacet := range termFacets {
tf.Add(termFacet)
}
return nil
}
type NumericRangeFacet struct {
Name string `json:"name"`
Min *float64 `json:"min,omitempty"`
Max *float64 `json:"max,omitempty"`
Count int `json:"count"`
}
func (nrf *NumericRangeFacet) Same(other *NumericRangeFacet) bool {
if nrf.Min == nil && other.Min != nil {
return false
}
if nrf.Min != nil && other.Min == nil {
return false
}
if nrf.Min != nil && other.Min != nil && *nrf.Min != *other.Min {
return false
}
if nrf.Max == nil && other.Max != nil {
return false
}
if nrf.Max != nil && other.Max == nil {
return false
}
if nrf.Max != nil && other.Max != nil && *nrf.Max != *other.Max {
return false
}
return true
}
type NumericRangeFacets []*NumericRangeFacet
func (nrf NumericRangeFacets) Add(numericRangeFacet *NumericRangeFacet) NumericRangeFacets {
for _, existingNr := range nrf {
if numericRangeFacet.Same(existingNr) {
existingNr.Count += numericRangeFacet.Count
return nrf
}
}
// if we got here it wasn't already in the existing terms
nrf = append(nrf, numericRangeFacet)
return nrf
}
func (nrf NumericRangeFacets) Len() int { return len(nrf) }
func (nrf NumericRangeFacets) Swap(i, j int) { nrf[i], nrf[j] = nrf[j], nrf[i] }
func (nrf NumericRangeFacets) Less(i, j int) bool {
if nrf[i].Count == nrf[j].Count {
return nrf[i].Name < nrf[j].Name
}
return nrf[i].Count > nrf[j].Count
}
type DateRangeFacet struct {
Name string `json:"name"`
Start *string `json:"start,omitempty"`
End *string `json:"end,omitempty"`
Count int `json:"count"`
}
func (drf *DateRangeFacet) Same(other *DateRangeFacet) bool {
if drf.Start == nil && other.Start != nil {
return false
}
if drf.Start != nil && other.Start == nil {
return false
}
if drf.Start != nil && other.Start != nil && *drf.Start != *other.Start {
return false
}
if drf.End == nil && other.End != nil {
return false
}
if drf.End != nil && other.End == nil {
return false
}
if drf.End != nil && other.End != nil && *drf.End != *other.End {
return false
}
return true
}
type DateRangeFacets []*DateRangeFacet
func (drf DateRangeFacets) Add(dateRangeFacet *DateRangeFacet) DateRangeFacets {
for _, existingDr := range drf {
if dateRangeFacet.Same(existingDr) {
existingDr.Count += dateRangeFacet.Count
return drf
}
}
// if we got here it wasn't already in the existing terms
drf = append(drf, dateRangeFacet)
return drf
}
func (drf DateRangeFacets) Len() int { return len(drf) }
func (drf DateRangeFacets) Swap(i, j int) { drf[i], drf[j] = drf[j], drf[i] }
func (drf DateRangeFacets) Less(i, j int) bool {
if drf[i].Count == drf[j].Count {
return drf[i].Name < drf[j].Name
}
return drf[i].Count > drf[j].Count
}
type FacetResult struct {
Field string `json:"field"`
Total int `json:"total"`
Missing int `json:"missing"`
Other int `json:"other"`
Terms *TermFacets `json:"terms,omitempty"`
NumericRanges NumericRangeFacets `json:"numeric_ranges,omitempty"`
DateRanges DateRangeFacets `json:"date_ranges,omitempty"`
}
func (fr *FacetResult) Size() int {
return reflectStaticSizeFacetResult + size.SizeOfPtr +
len(fr.Field) +
fr.Terms.Len()*(reflectStaticSizeTermFacet+size.SizeOfPtr) +
len(fr.NumericRanges)*(reflectStaticSizeNumericRangeFacet+size.SizeOfPtr) +
len(fr.DateRanges)*(reflectStaticSizeDateRangeFacet+size.SizeOfPtr)
}
func (fr *FacetResult) Merge(other *FacetResult) {
fr.Total += other.Total
fr.Missing += other.Missing
fr.Other += other.Other
if other.Terms != nil {
if fr.Terms == nil {
fr.Terms = other.Terms
return
}
for _, term := range other.Terms.termFacets {
fr.Terms.Add(term)
}
}
if other.NumericRanges != nil {
if fr.NumericRanges == nil {
fr.NumericRanges = other.NumericRanges
return
}
for _, nr := range other.NumericRanges {
fr.NumericRanges = fr.NumericRanges.Add(nr)
}
}
if other.DateRanges != nil {
if fr.DateRanges == nil {
fr.DateRanges = other.DateRanges
return
}
for _, dr := range other.DateRanges {
fr.DateRanges = fr.DateRanges.Add(dr)
}
}
}
func (fr *FacetResult) Fixup(size int) {
if fr.Terms != nil {
sort.Sort(fr.Terms)
if fr.Terms.Len() > size {
moveToOther := fr.Terms.termFacets[size:]
for _, mto := range moveToOther {
fr.Other += mto.Count
}
fr.Terms.termFacets = fr.Terms.termFacets[0:size]
}
} else if fr.NumericRanges != nil {
sort.Sort(fr.NumericRanges)
if len(fr.NumericRanges) > size {
moveToOther := fr.NumericRanges[size:]
for _, mto := range moveToOther {
fr.Other += mto.Count
}
fr.NumericRanges = fr.NumericRanges[0:size]
}
} else if fr.DateRanges != nil {
sort.Sort(fr.DateRanges)
if len(fr.DateRanges) > size {
moveToOther := fr.DateRanges[size:]
for _, mto := range moveToOther {
fr.Other += mto.Count
}
fr.DateRanges = fr.DateRanges[0:size]
}
}
}
type FacetResults map[string]*FacetResult
func (fr FacetResults) Merge(other FacetResults) {
for name, oFacetResult := range other {
facetResult, ok := fr[name]
if ok {
facetResult.Merge(oFacetResult)
} else {
fr[name] = oFacetResult
}
}
}
func (fr FacetResults) Fixup(name string, size int) {
facetResult, ok := fr[name]
if ok {
facetResult.Fixup(size)
}
}
func (fb *FacetsBuilder) Results() FacetResults {
fr := make(FacetResults)
for i, facetBuilder := range fb.facets {
facetResult := facetBuilder.Result()
fr[fb.facetNames[i]] = facetResult
}
return fr
}
================================================
FILE: search/facets_builder_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"fmt"
"reflect"
"testing"
)
func TestTermFacetResultsMerge(t *testing.T) {
type testCase struct {
// Input
frs1 FacetResults // first facet results
frs2 FacetResults // second facet results (to be merged into first)
fixups map[string]int // {facetName:size} (to be applied after merge)
// Expected output
expFrs FacetResults // facet results after merge and fixup
}
tests := []*testCase{
func() *testCase {
rv := &testCase{}
rv.frs1 = FacetResults{
"types": &FacetResult{
Field: "type",
Total: 100,
Missing: 25,
Other: 25,
Terms: func() *TermFacets {
tfs := &TermFacets{}
tfs.Add(
&TermFacet{
Term: "blog",
Count: 25,
},
&TermFacet{
Term: "comment",
Count: 24,
},
&TermFacet{
Term: "feedback",
Count: 1,
},
)
return tfs
}(),
},
"categories": &FacetResult{
Field: "category",
Total: 97,
Missing: 22,
Other: 15,
Terms: func() *TermFacets {
tfs := &TermFacets{}
tfs.Add(
&TermFacet{
Term: "clothing",
Count: 35,
},
&TermFacet{
Term: "electronics",
Count: 25,
},
)
return tfs
}(),
},
}
rv.frs2 = FacetResults{
"types": &FacetResult{
Field: "type",
Total: 100,
Missing: 25,
Other: 25,
Terms: func() *TermFacets {
tfs := &TermFacets{}
tfs.Add(
&TermFacet{
Term: "blog",
Count: 25,
},
&TermFacet{
Term: "comment",
Count: 22,
},
&TermFacet{
Term: "flag",
Count: 3,
},
)
return tfs
}(),
},
}
rv.fixups = map[string]int{
"types": 3, // we want top 3 terms based on count
}
rv.expFrs = FacetResults{
"types": &FacetResult{
Field: "type",
Total: 200,
Missing: 50,
Other: 51,
Terms: &TermFacets{
termFacets: []*TermFacet{
{
Term: "blog",
Count: 50,
},
{
Term: "comment",
Count: 46,
},
{
Term: "flag",
Count: 3,
},
},
},
},
"categories": rv.frs1["categories"],
}
return rv
}(),
func() *testCase {
rv := &testCase{}
rv.frs1 = FacetResults{
"facetName": &FacetResult{
Field: "docField",
Total: 0,
Missing: 0,
Other: 0,
Terms: nil,
},
}
rv.frs2 = FacetResults{
"facetName": &FacetResult{
Field: "docField",
Total: 3,
Missing: 0,
Other: 0,
Terms: &TermFacets{
termFacets: []*TermFacet{
{
Term: "firstTerm",
Count: 1,
},
{
Term: "secondTerm",
Count: 2,
},
},
},
},
}
rv.fixups = map[string]int{
"facetName": 1,
}
rv.expFrs = FacetResults{
"facetName": &FacetResult{
Field: "docField",
Total: 3,
Missing: 0,
Other: 1,
Terms: &TermFacets{
termFacets: []*TermFacet{
{
Term: "secondTerm",
Count: 2,
},
},
},
},
}
return rv
}(),
}
for tcIdx, tc := range tests {
t.Run(fmt.Sprintf("T#%d", tcIdx), func(t *testing.T) {
tc.frs1.Merge(tc.frs2)
for facetName, size := range tc.fixups {
tc.frs1.Fixup(facetName, size)
}
// clear termLookup, so we can compare the facet results
for _, fr := range tc.frs1 {
if fr.Terms != nil {
fr.Terms.termLookup = nil
}
}
if !reflect.DeepEqual(tc.frs1, tc.expFrs) {
t.Errorf("expected %v, got %v", tc.expFrs, tc.frs1)
}
})
}
}
func TestNumericFacetResultsMerge(t *testing.T) {
lowmed := 3.0
medhi := 6.0
hihigher := 9.0
// why second copy? the pointers may be different, but values the same
lowmed2 := 3.0
medhi2 := 6.0
hihigher2 := 9.0
fr1 := &FacetResult{
Field: "rating",
Total: 100,
Missing: 25,
Other: 25,
NumericRanges: []*NumericRangeFacet{
{
Name: "low",
Max: &lowmed,
Count: 25,
},
{
Name: "med",
Count: 24,
Max: &lowmed,
Min: &medhi,
},
{
Name: "hi",
Count: 1,
Min: &medhi,
Max: &hihigher,
},
},
}
frs1 := FacetResults{
"ratings": fr1,
}
fr2 := &FacetResult{
Field: "rating",
Total: 100,
Missing: 25,
Other: 25,
NumericRanges: []*NumericRangeFacet{
{
Name: "low",
Max: &lowmed2,
Count: 25,
},
{
Name: "med",
Max: &lowmed2,
Min: &medhi2,
Count: 22,
},
{
Name: "highest",
Min: &hihigher2,
Count: 3,
},
},
}
frs2 := FacetResults{
"ratings": fr2,
}
expectedFr := &FacetResult{
Field: "rating",
Total: 200,
Missing: 50,
Other: 51,
NumericRanges: []*NumericRangeFacet{
{
Name: "low",
Count: 50,
Max: &lowmed,
},
{
Name: "med",
Max: &lowmed,
Min: &medhi,
Count: 46,
},
{
Name: "highest",
Min: &hihigher,
Count: 3,
},
},
}
expectedFrs := FacetResults{
"ratings": expectedFr,
}
frs1.Merge(frs2)
frs1.Fixup("ratings", 3)
if !reflect.DeepEqual(frs1, expectedFrs) {
t.Errorf("expected %#v, got %#v", expectedFrs, frs1)
}
}
func TestDateFacetResultsMerge(t *testing.T) {
lowmed := "2010-01-01"
medhi := "2011-01-01"
hihigher := "2012-01-01"
// why second copy? the pointer are to strings done by date time parsing
// inside the facet generation, so comparing pointers will not work
lowmed2 := "2010-01-01"
medhi2 := "2011-01-01"
hihigher2 := "2012-01-01"
fr1 := &FacetResult{
Field: "birthday",
Total: 100,
Missing: 25,
Other: 25,
DateRanges: []*DateRangeFacet{
{
Name: "low",
End: &lowmed,
Count: 25,
},
{
Name: "med",
Count: 24,
Start: &lowmed,
End: &medhi,
},
{
Name: "hi",
Count: 1,
Start: &medhi,
End: &hihigher,
},
},
}
frs1 := FacetResults{
"birthdays": fr1,
}
fr2 := &FacetResult{
Field: "birthday",
Total: 100,
Missing: 25,
Other: 25,
DateRanges: []*DateRangeFacet{
{
Name: "low",
End: &lowmed2,
Count: 25,
},
{
Name: "med",
Start: &lowmed2,
End: &medhi2,
Count: 22,
},
{
Name: "highest",
Start: &hihigher2,
Count: 3,
},
},
}
frs2 := FacetResults{
"birthdays": fr2,
}
expectedFr := &FacetResult{
Field: "birthday",
Total: 200,
Missing: 50,
Other: 51,
DateRanges: []*DateRangeFacet{
{
Name: "low",
Count: 50,
End: &lowmed,
},
{
Name: "med",
Start: &lowmed,
End: &medhi,
Count: 46,
},
{
Name: "highest",
Start: &hihigher,
Count: 3,
},
},
}
expectedFrs := FacetResults{
"birthdays": expectedFr,
}
frs1.Merge(frs2)
frs1.Fixup("birthdays", 3)
if !reflect.DeepEqual(frs1, expectedFrs) {
t.Errorf("expected %#v, got %#v", expectedFrs, frs1)
}
}
================================================
FILE: search/highlight/format/ansi/ansi.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ansi
import (
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search/highlight"
)
const Name = "ansi"
const DefaultAnsiHighlight = BgYellow
type FragmentFormatter struct {
color string
}
func NewFragmentFormatter(color string) *FragmentFormatter {
return &FragmentFormatter{
color: color,
}
}
func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations highlight.TermLocations) string {
rv := ""
curr := f.Start
for _, termLocation := range orderedTermLocations {
if termLocation == nil {
continue
}
// make sure the array positions match
if !termLocation.ArrayPositions.Equals(f.ArrayPositions) {
continue
}
if termLocation.Start < curr {
continue
}
if termLocation.End > f.End {
break
}
// add the stuff before this location
rv += string(f.Orig[curr:termLocation.Start])
// add the color
rv += a.color
// add the term itself
rv += string(f.Orig[termLocation.Start:termLocation.End])
// reset the color
rv += Reset
// update current
curr = termLocation.End
}
// add any remaining text after the last token
rv += string(f.Orig[curr:f.End])
return rv
}
// ANSI color control escape sequences.
// Shamelessly copied from https://github.com/sqp/godock/blob/master/libs/log/colors.go
const (
Reset = "\x1b[0m"
Bright = "\x1b[1m"
Dim = "\x1b[2m"
Underscore = "\x1b[4m"
Blink = "\x1b[5m"
Reverse = "\x1b[7m"
Hidden = "\x1b[8m"
FgBlack = "\x1b[30m"
FgRed = "\x1b[31m"
FgGreen = "\x1b[32m"
FgYellow = "\x1b[33m"
FgBlue = "\x1b[34m"
FgMagenta = "\x1b[35m"
FgCyan = "\x1b[36m"
FgWhite = "\x1b[37m"
BgBlack = "\x1b[40m"
BgRed = "\x1b[41m"
BgGreen = "\x1b[42m"
BgYellow = "\x1b[43m"
BgBlue = "\x1b[44m"
BgMagenta = "\x1b[45m"
BgCyan = "\x1b[46m"
BgWhite = "\x1b[47m"
)
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.FragmentFormatter, error) {
color := DefaultAnsiHighlight
colorVal, ok := config["color"].(string)
if ok {
color = colorVal
}
return NewFragmentFormatter(color), nil
}
func init() {
err := registry.RegisterFragmentFormatter(Name, Constructor)
if err != nil {
panic(err)
}
}
================================================
FILE: search/highlight/format/html/html.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package html
import (
"html"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search/highlight"
)
const Name = "html"
const defaultHTMLHighlightBefore = ""
const defaultHTMLHighlightAfter = ""
type FragmentFormatter struct {
before string
after string
}
func NewFragmentFormatter(before, after string) *FragmentFormatter {
return &FragmentFormatter{
before: before,
after: after,
}
}
func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations highlight.TermLocations) string {
rv := ""
curr := f.Start
for _, termLocation := range orderedTermLocations {
if termLocation == nil {
continue
}
// make sure the array positions match
if !termLocation.ArrayPositions.Equals(f.ArrayPositions) {
continue
}
if termLocation.Start < curr {
continue
}
if termLocation.End > f.End {
break
}
// add the stuff before this location
rv += html.EscapeString(string(f.Orig[curr:termLocation.Start]))
// start the tag
rv += a.before
// add the term itself
rv += html.EscapeString(string(f.Orig[termLocation.Start:termLocation.End]))
// end the tag
rv += a.after
// update current
curr = termLocation.End
}
// add any remaining text after the last token
rv += html.EscapeString(string(f.Orig[curr:f.End]))
return rv
}
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.FragmentFormatter, error) {
before := defaultHTMLHighlightBefore
beforeVal, ok := config["before"].(string)
if ok {
before = beforeVal
}
after := defaultHTMLHighlightAfter
afterVal, ok := config["after"].(string)
if ok {
after = afterVal
}
return NewFragmentFormatter(before, after), nil
}
func init() {
err := registry.RegisterFragmentFormatter(Name, Constructor)
if err != nil {
panic(err)
}
}
================================================
FILE: search/highlight/format/html/html_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package html
import (
"testing"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/highlight"
)
func TestHTMLFragmentFormatter(t *testing.T) {
tests := []struct {
fragment *highlight.Fragment
tlm search.TermLocationMap
output string
start string
end string
}{
{
fragment: &highlight.Fragment{
Orig: []byte("the quick brown fox"),
Start: 0,
End: 19,
},
tlm: search.TermLocationMap{
"quick": []*search.Location{
{
Pos: 2,
Start: 4,
End: 9,
},
},
},
output: "the quick brown fox",
start: "",
end: "",
},
{
fragment: &highlight.Fragment{
Orig: []byte("the quick brown fox"),
Start: 0,
End: 19,
},
tlm: search.TermLocationMap{
"quick": []*search.Location{
{
Pos: 2,
Start: 4,
End: 9,
},
},
},
output: "the quick brown fox",
start: "",
end: "",
},
// test html escaping
{
fragment: &highlight.Fragment{
Orig: []byte(" quick brown & fox"),
Start: 0,
End: 23,
},
tlm: search.TermLocationMap{
"quick": []*search.Location{
{
Pos: 2,
Start: 6,
End: 11,
},
},
},
output: "<the> quick brown & fox",
start: "",
end: "",
},
// test html escaping inside search term
{
fragment: &highlight.Fragment{
Orig: []byte(" qu&ick brown & fox"),
Start: 0,
End: 24,
},
tlm: search.TermLocationMap{
"qu&ick": []*search.Location{
{
Pos: 2,
Start: 6,
End: 12,
},
},
},
output: "<the> qu&ick brown & fox",
start: "",
end: "",
},
}
for _, test := range tests {
emHTMLFormatter := NewFragmentFormatter(test.start, test.end)
otl := highlight.OrderTermLocations(test.tlm)
result := emHTMLFormatter.Format(test.fragment, otl)
if result != test.output {
t.Errorf("expected `%s`, got `%s`", test.output, result)
}
}
}
================================================
FILE: search/highlight/format/plain/plain.go
================================================
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package plain
import (
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search/highlight"
)
const Name = "plain"
const defaultPlainHighlightBefore = ""
const defaultPlainHighlightAfter = ""
type FragmentFormatter struct {
before string
after string
}
func NewFragmentFormatter(before, after string) *FragmentFormatter {
return &FragmentFormatter{
before: before,
after: after,
}
}
func (a *FragmentFormatter) Format(f *highlight.Fragment, orderedTermLocations highlight.TermLocations) string {
rv := ""
curr := f.Start
for _, termLocation := range orderedTermLocations {
if termLocation == nil {
continue
}
// make sure the array positions match
if !termLocation.ArrayPositions.Equals(f.ArrayPositions) {
continue
}
if termLocation.Start < curr {
continue
}
if termLocation.End > f.End {
break
}
// add the stuff before this location
rv += string(f.Orig[curr:termLocation.Start])
// start the highlight tag
rv += a.before
// add the term itself
rv += string(f.Orig[termLocation.Start:termLocation.End])
// end the highlight tag
rv += a.after
// update current
curr = termLocation.End
}
// add any remaining text after the last token
rv += string(f.Orig[curr:f.End])
return rv
}
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.FragmentFormatter, error) {
before := defaultPlainHighlightBefore
beforeVal, ok := config["before"].(string)
if ok {
before = beforeVal
}
after := defaultPlainHighlightAfter
afterVal, ok := config["after"].(string)
if ok {
after = afterVal
}
return NewFragmentFormatter(before, after), nil
}
func init() {
err := registry.RegisterFragmentFormatter(Name, Constructor)
if err != nil {
panic(err)
}
}
================================================
FILE: search/highlight/format/plain/plain_test.go
================================================
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package plain
import (
"testing"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/highlight"
)
func TestPlainFragmentFormatter(t *testing.T) {
tests := []struct {
fragment *highlight.Fragment
tlm search.TermLocationMap
output string
start string
end string
}{
{
fragment: &highlight.Fragment{
Orig: []byte("the quick brown fox"),
Start: 0,
End: 19,
},
tlm: search.TermLocationMap{
"quick": []*search.Location{
{
Pos: 2,
Start: 4,
End: 9,
},
},
},
output: "the quick brown fox",
start: "",
end: "",
},
{
fragment: &highlight.Fragment{
Orig: []byte("the quick brown fox"),
Start: 0,
End: 19,
},
tlm: search.TermLocationMap{
"quick": []*search.Location{
{
Pos: 2,
Start: 4,
End: 9,
},
},
},
output: "the quick brown fox",
start: "",
end: "",
},
}
for _, test := range tests {
plainFormatter := NewFragmentFormatter(test.start, test.end)
otl := highlight.OrderTermLocations(test.tlm)
result := plainFormatter.Format(test.fragment, otl)
if result != test.output {
t.Errorf("expected `%s`, got `%s`", test.output, result)
}
}
}
================================================
FILE: search/highlight/fragmenter/simple/simple.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package simple
import (
"unicode/utf8"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search/highlight"
)
const Name = "simple"
const defaultFragmentSize = 200
type Fragmenter struct {
fragmentSize int
}
func NewFragmenter(fragmentSize int) *Fragmenter {
return &Fragmenter{
fragmentSize: fragmentSize,
}
}
func (s *Fragmenter) Fragment(orig []byte, ot highlight.TermLocations) []*highlight.Fragment {
var rv []*highlight.Fragment
maxbegin := 0
OUTER:
for currTermIndex, termLocation := range ot {
// start with this
// it should be the highest scoring fragment with this term first
start := termLocation.Start
end := start
used := 0
for end < len(orig) && used < s.fragmentSize {
r, size := utf8.DecodeRune(orig[end:])
if r == utf8.RuneError {
continue OUTER // bail
}
end += size
used++
}
// if we still have more characters available to us
// push back towards beginning
// without cross maxbegin
for start > 0 && used < s.fragmentSize {
if start > len(orig) {
// bail if out of bounds, possibly due to token replacement
// e.g with a regexp replacement
continue OUTER
}
r, size := utf8.DecodeLastRune(orig[0:start])
if r == utf8.RuneError {
continue OUTER // bail
}
if start-size >= maxbegin {
start -= size
used++
} else {
break
}
}
// however, we'd rather have the tokens centered more in the frag
// lets try to do that as best we can, without affecting the score
// find the end of the last term in this fragment
minend := end
for _, innerTermLocation := range ot[currTermIndex:] {
if innerTermLocation.End > end {
break
}
minend = innerTermLocation.End
}
// find the smaller of the two rooms to move
roomToMove := utf8.RuneCount(orig[minend:end])
roomToMoveStart := 0
if start >= maxbegin {
roomToMoveStart = utf8.RuneCount(orig[maxbegin:start])
}
if roomToMoveStart < roomToMove {
roomToMove = roomToMoveStart
}
offset := roomToMove / 2
for offset > 0 {
r, size := utf8.DecodeLastRune(orig[0:start])
if r == utf8.RuneError {
continue OUTER // bail
}
start -= size
r, size = utf8.DecodeLastRune(orig[0:end])
if r == utf8.RuneError {
continue OUTER // bail
}
end -= size
offset--
}
rv = append(rv, &highlight.Fragment{Orig: orig, Start: start - offset, End: end - offset})
// set maxbegin to the end of the current term location
// so that next one won't back up to include it
maxbegin = termLocation.End
}
if len(ot) == 0 {
// if there were no terms to highlight
// produce a single fragment from the beginning
start := 0
end := start
used := 0
for end < len(orig) && used < s.fragmentSize {
r, size := utf8.DecodeRune(orig[end:])
if r == utf8.RuneError {
break
}
end += size
used++
}
rv = append(rv, &highlight.Fragment{Orig: orig, Start: start, End: end})
}
return rv
}
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Fragmenter, error) {
size := defaultFragmentSize
sizeVal, ok := config["size"].(float64)
if ok {
size = int(sizeVal)
}
return NewFragmenter(size), nil
}
func init() {
err := registry.RegisterFragmenter(Name, Constructor)
if err != nil {
panic(err)
}
}
================================================
FILE: search/highlight/fragmenter/simple/simple_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package simple
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/search/highlight"
)
func TestSimpleFragmenter(t *testing.T) {
tests := []struct {
orig []byte
fragments []*highlight.Fragment
ot highlight.TermLocations
size int
}{
{
orig: []byte("this is a test"),
fragments: []*highlight.Fragment{
{
Orig: []byte("this is a test"),
Start: 0,
End: 14,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "test",
Pos: 4,
Start: 10,
End: 14,
},
},
size: 100,
},
{
orig: []byte("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"),
fragments: []*highlight.Fragment{
{
Orig: []byte("0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789"),
Start: 0,
End: 100,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789",
Pos: 1,
Start: 0,
End: 100,
},
},
size: 100,
},
{
orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
fragments: []*highlight.Fragment{
{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 0,
End: 100,
},
{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 10,
End: 101,
},
{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 20,
End: 101,
},
{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 30,
End: 101,
},
{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 40,
End: 101,
},
{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 50,
End: 101,
},
{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 60,
End: 101,
},
{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 70,
End: 101,
},
{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 80,
End: 101,
},
{
Orig: []byte("01234567890123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890"),
Start: 90,
End: 101,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "0123456789",
Pos: 1,
Start: 0,
End: 10,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 2,
Start: 10,
End: 20,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 3,
Start: 20,
End: 30,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 4,
Start: 30,
End: 40,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 5,
Start: 40,
End: 50,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 6,
Start: 50,
End: 60,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 7,
Start: 60,
End: 70,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 8,
Start: 70,
End: 80,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 9,
Start: 80,
End: 90,
},
&highlight.TermLocation{
Term: "0123456789",
Pos: 10,
Start: 90,
End: 100,
},
},
size: 100,
},
{
orig: []byte("[[पानी का स्वाद]] [[नीलेश रघुवंशी]] का कविता संग्रह हैं। इस कृति के लिए उन्हें २००४ में [[केदार सम्मान]] से सम्मानित किया गया है।{{केदार सम्मान से सम्मानित कृतियाँ}}"),
fragments: []*highlight.Fragment{
{
Orig: []byte("[[पानी का स्वाद]] [[नीलेश रघुवंशी]] का कविता संग्रह हैं। इस कृति के लिए उन्हें २००४ में [[केदार सम्मान]] से सम्मानित किया गया है।{{केदार सम्मान से सम्मानित कृतियाँ}}"),
Start: 0,
End: 411,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "पानी",
Pos: 1,
Start: 2,
End: 14,
},
},
size: 200,
},
{
orig: []byte("交换机"),
fragments: []*highlight.Fragment{
{
Orig: []byte("交换机"),
Start: 0,
End: 9,
},
{
Orig: []byte("交换机"),
Start: 3,
End: 9,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "交换",
Pos: 1,
Start: 0,
End: 6,
},
&highlight.TermLocation{
Term: "换机",
Pos: 2,
Start: 3,
End: 9,
},
},
size: 200,
},
}
for _, test := range tests {
fragmenter := NewFragmenter(test.size)
fragments := fragmenter.Fragment(test.orig, test.ot)
if !reflect.DeepEqual(fragments, test.fragments) {
t.Errorf("expected %#v, got %#v", test.fragments, fragments)
for _, fragment := range fragments {
t.Logf("frag: %s", fragment.Orig[fragment.Start:fragment.End])
t.Logf("frag: %d - %d", fragment.Start, fragment.End)
}
}
}
}
func TestSimpleFragmenterWithSize(t *testing.T) {
tests := []struct {
orig []byte
fragments []*highlight.Fragment
ot highlight.TermLocations
}{
{
orig: []byte("this is a test"),
fragments: []*highlight.Fragment{
{
Orig: []byte("this is a test"),
Start: 0,
End: 5,
},
{
Orig: []byte("this is a test"),
Start: 9,
End: 14,
},
},
ot: highlight.TermLocations{
&highlight.TermLocation{
Term: "this",
Pos: 1,
Start: 0,
End: 5,
},
&highlight.TermLocation{
Term: "test",
Pos: 4,
Start: 10,
End: 14,
},
},
},
{
orig: []byte("避免出现 rune 越界问题"),
fragments: []*highlight.Fragment{
{
Orig: []byte("避免出现 rune 越界问题"),
Start: 0,
End: 13,
},
},
ot: nil,
},
}
fragmenter := NewFragmenter(5)
for _, test := range tests {
fragments := fragmenter.Fragment(test.orig, test.ot)
if !reflect.DeepEqual(fragments, test.fragments) {
t.Errorf("expected %#v, got %#v", test.fragments, fragments)
for _, fragment := range fragments {
t.Logf("frag: %#v", fragment)
}
}
}
}
================================================
FILE: search/highlight/highlighter/ansi/ansi.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package ansi
import (
"fmt"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search/highlight"
ansiFormatter "github.com/blevesearch/bleve/v2/search/highlight/format/ansi"
simpleFragmenter "github.com/blevesearch/bleve/v2/search/highlight/fragmenter/simple"
simpleHighlighter "github.com/blevesearch/bleve/v2/search/highlight/highlighter/simple"
)
const Name = "ansi"
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) {
fragmenter, err := cache.FragmenterNamed(simpleFragmenter.Name)
if err != nil {
return nil, fmt.Errorf("error building fragmenter: %v", err)
}
formatter, err := cache.FragmentFormatterNamed(ansiFormatter.Name)
if err != nil {
return nil, fmt.Errorf("error building fragment formatter: %v", err)
}
return simpleHighlighter.NewHighlighter(
fragmenter,
formatter,
simpleHighlighter.DefaultSeparator),
nil
}
func init() {
err := registry.RegisterHighlighter(Name, Constructor)
if err != nil {
panic(err)
}
}
================================================
FILE: search/highlight/highlighter/html/html.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package html
import (
"fmt"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search/highlight"
htmlFormatter "github.com/blevesearch/bleve/v2/search/highlight/format/html"
simpleFragmenter "github.com/blevesearch/bleve/v2/search/highlight/fragmenter/simple"
simpleHighlighter "github.com/blevesearch/bleve/v2/search/highlight/highlighter/simple"
)
const Name = "html"
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) {
fragmenter, err := cache.FragmenterNamed(simpleFragmenter.Name)
if err != nil {
return nil, fmt.Errorf("error building fragmenter: %v", err)
}
formatter, err := cache.FragmentFormatterNamed(htmlFormatter.Name)
if err != nil {
return nil, fmt.Errorf("error building fragment formatter: %v", err)
}
return simpleHighlighter.NewHighlighter(
fragmenter,
formatter,
simpleHighlighter.DefaultSeparator),
nil
}
func init() {
err := registry.RegisterHighlighter(Name, Constructor)
if err != nil {
panic(err)
}
}
================================================
FILE: search/highlight/highlighter/simple/fragment_scorer_simple.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package simple
import (
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/highlight"
)
// FragmentScorer will score fragments by how many
// unique terms occur in the fragment with no regard for
// any boost values used in the original query
type FragmentScorer struct {
tlm search.TermLocationMap
}
func NewFragmentScorer(tlm search.TermLocationMap) *FragmentScorer {
return &FragmentScorer{
tlm: tlm,
}
}
func (s *FragmentScorer) Score(f *highlight.Fragment) {
score := 0.0
OUTER:
for _, locations := range s.tlm {
for _, location := range locations {
if location.ArrayPositions.Equals(f.ArrayPositions) && int(location.Start) >= f.Start && int(location.End) <= f.End {
score += 1.0
// once we find a term in the fragment
// don't care about additional matches
continue OUTER
}
}
}
f.Score = score
}
================================================
FILE: search/highlight/highlighter/simple/fragment_scorer_simple_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package simple
import (
"testing"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/highlight"
)
func TestSimpleFragmentScorer(t *testing.T) {
tests := []struct {
fragment *highlight.Fragment
tlm search.TermLocationMap
score float64
}{
{
fragment: &highlight.Fragment{
Orig: []byte("cat in the hat"),
Start: 0,
End: 14,
},
tlm: search.TermLocationMap{
"cat": []*search.Location{
{
Pos: 0,
Start: 0,
End: 3,
},
},
},
score: 1,
},
{
fragment: &highlight.Fragment{
Orig: []byte("cat in the hat"),
Start: 0,
End: 14,
},
tlm: search.TermLocationMap{
"cat": []*search.Location{
{
Pos: 1,
Start: 0,
End: 3,
},
},
"hat": []*search.Location{
{
Pos: 4,
Start: 11,
End: 14,
},
},
},
score: 2,
},
}
for _, test := range tests {
scorer := NewFragmentScorer(test.tlm)
scorer.Score(test.fragment)
if test.fragment.Score != test.score {
t.Errorf("expected score %f, got %f", test.score, test.fragment.Score)
}
}
}
================================================
FILE: search/highlight/highlighter/simple/highlighter_simple.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package simple
import (
"container/heap"
"fmt"
index "github.com/blevesearch/bleve_index_api"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/highlight"
)
const Name = "simple"
const DefaultSeparator = "…"
type Highlighter struct {
fragmenter highlight.Fragmenter
formatter highlight.FragmentFormatter
sep string
}
func NewHighlighter(fragmenter highlight.Fragmenter, formatter highlight.FragmentFormatter, separator string) *Highlighter {
return &Highlighter{
fragmenter: fragmenter,
formatter: formatter,
sep: separator,
}
}
func (s *Highlighter) Fragmenter() highlight.Fragmenter {
return s.fragmenter
}
func (s *Highlighter) SetFragmenter(f highlight.Fragmenter) {
s.fragmenter = f
}
func (s *Highlighter) FragmentFormatter() highlight.FragmentFormatter {
return s.formatter
}
func (s *Highlighter) SetFragmentFormatter(f highlight.FragmentFormatter) {
s.formatter = f
}
func (s *Highlighter) Separator() string {
return s.sep
}
func (s *Highlighter) SetSeparator(sep string) {
s.sep = sep
}
func (s *Highlighter) BestFragmentInField(dm *search.DocumentMatch, doc index.Document, field string) string {
fragments := s.BestFragmentsInField(dm, doc, field, 1)
if len(fragments) > 0 {
return fragments[0]
}
return ""
}
func (s *Highlighter) BestFragmentsInField(dm *search.DocumentMatch, doc index.Document, field string, num int) []string {
tlm := dm.Locations[field]
orderedTermLocations := highlight.OrderTermLocations(tlm)
scorer := NewFragmentScorer(tlm)
// score the fragments and put them into a priority queue ordered by score
fq := make(FragmentQueue, 0)
heap.Init(&fq)
doc.VisitFields(func(f index.Field) {
if f.Name() == field {
_, ok := f.(index.TextField)
if ok {
termLocationsSameArrayPosition := make(highlight.TermLocations, 0)
for _, otl := range orderedTermLocations {
if otl.ArrayPositions.Equals(f.ArrayPositions()) {
termLocationsSameArrayPosition = append(termLocationsSameArrayPosition, otl)
}
}
fieldData := f.Value()
fragments := s.fragmenter.Fragment(fieldData, termLocationsSameArrayPosition)
for _, fragment := range fragments {
fragment.ArrayPositions = f.ArrayPositions()
scorer.Score(fragment)
heap.Push(&fq, fragment)
}
}
}
})
// now find the N best non-overlapping fragments
var bestFragments []*highlight.Fragment
if len(fq) > 0 {
candidate := heap.Pop(&fq)
OUTER:
for candidate != nil && len(bestFragments) < num {
// see if this overlaps with any of the best already identified
if len(bestFragments) > 0 {
for _, frag := range bestFragments {
if candidate.(*highlight.Fragment).Overlaps(frag) {
if len(fq) < 1 {
break OUTER
}
candidate = heap.Pop(&fq)
continue OUTER
}
}
bestFragments = append(bestFragments, candidate.(*highlight.Fragment))
} else {
bestFragments = append(bestFragments, candidate.(*highlight.Fragment))
}
if len(fq) < 1 {
break
}
candidate = heap.Pop(&fq)
}
}
// now that we have the best fragments, we can format them
orderedTermLocations.MergeOverlapping()
formattedFragments := make([]string, len(bestFragments))
for i, fragment := range bestFragments {
formattedFragments[i] = ""
if fragment.Start != 0 {
formattedFragments[i] += s.sep
}
formattedFragments[i] += s.formatter.Format(fragment, orderedTermLocations)
if fragment.End != len(fragment.Orig) {
formattedFragments[i] += s.sep
}
}
if len(formattedFragments) > 0 {
dm.AddFragments(field, formattedFragments)
}
return formattedFragments
}
// FragmentQueue implements heap.Interface and holds Items.
type FragmentQueue []*highlight.Fragment
func (fq FragmentQueue) Len() int { return len(fq) }
func (fq FragmentQueue) Less(i, j int) bool {
// We want Pop to give us the highest, not lowest, priority so we use greater-than here.
return fq[i].Score > fq[j].Score
}
func (fq FragmentQueue) Swap(i, j int) {
fq[i], fq[j] = fq[j], fq[i]
fq[i].Index = i
fq[j].Index = j
}
func (fq *FragmentQueue) Push(x interface{}) {
n := len(*fq)
item := x.(*highlight.Fragment)
item.Index = n
*fq = append(*fq, item)
}
func (fq *FragmentQueue) Pop() interface{} {
old := *fq
n := len(old)
item := old[n-1]
item.Index = -1 // for safety
*fq = old[0 : n-1]
return item
}
func Constructor(config map[string]interface{}, cache *registry.Cache) (highlight.Highlighter, error) {
separator := DefaultSeparator
separatorVal, ok := config["separator"].(string)
if ok {
separator = separatorVal
}
fragmenterName, ok := config["fragmenter"].(string)
if !ok {
return nil, fmt.Errorf("must specify fragmenter")
}
fragmenter, err := cache.FragmenterNamed(fragmenterName)
if err != nil {
return nil, fmt.Errorf("error building fragmenter: %v", err)
}
formatterName, ok := config["formatter"].(string)
if !ok {
return nil, fmt.Errorf("must specify formatter")
}
formatter, err := cache.FragmentFormatterNamed(formatterName)
if err != nil {
return nil, fmt.Errorf("error building fragment formatter: %v", err)
}
return NewHighlighter(fragmenter, formatter, separator), nil
}
func init() {
err := registry.RegisterHighlighter(Name, Constructor)
if err != nil {
panic(err)
}
}
================================================
FILE: search/highlight/highlighter/simple/highlighter_simple_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package simple
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/highlight/format/ansi"
sfrag "github.com/blevesearch/bleve/v2/search/highlight/fragmenter/simple"
)
const (
reset = "\x1b[0m"
DefaultAnsiHighlight = "\x1b[43m"
)
func TestSimpleHighlighter(t *testing.T) {
fragmenter := sfrag.NewFragmenter(100)
formatter := ansi.NewFragmentFormatter(ansi.DefaultAnsiHighlight)
highlighter := NewHighlighter(fragmenter, formatter, DefaultSeparator)
docMatch := search.DocumentMatch{
ID: "a",
Score: 1.0,
Locations: search.FieldTermLocationMap{
"desc": search.TermLocationMap{
"quick": []*search.Location{
{
Pos: 2,
Start: 4,
End: 9,
},
},
"fox": []*search.Location{
{
Pos: 4,
Start: 16,
End: 19,
},
},
},
},
}
expectedFragment := "the " + DefaultAnsiHighlight + "quick" + reset + " brown " + DefaultAnsiHighlight + "fox" + reset + " jumps over the lazy dog"
doc := document.NewDocument("a").AddField(document.NewTextField("desc", []uint64{}, []byte("the quick brown fox jumps over the lazy dog")))
fragment := highlighter.BestFragmentInField(&docMatch, doc, "desc")
if fragment != expectedFragment {
t.Errorf("expected `%s`, got `%s`", expectedFragment, fragment)
}
}
func TestSimpleHighlighterLonger(t *testing.T) {
fieldBytes := []byte(`Lorem ipsum dolor sit amet, consectetur adipiscing elit. Mauris sed semper nulla, sed pellentesque urna. Suspendisse potenti. Aliquam dignissim pulvinar erat vel ullamcorper. Nullam sed diam at dolor dapibus varius. Vestibulum at semper nunc. Integer ullamcorper enim ut nisi condimentum lacinia. Nulla ipsum ipsum, dictum in dapibus non, bibendum eget neque. Vestibulum malesuada erat quis malesuada dictum. Mauris luctus viverra lorem, nec hendrerit lacus lacinia ut. Donec suscipit sit amet nisi et dictum. Maecenas ultrices mollis diam, vel commodo libero lobortis nec. Nunc non dignissim dolor. Nulla non tempus risus, eget porttitor lectus. Suspendisse vitae gravida magna, a sagittis urna. Curabitur nec dui volutpat, hendrerit nisi non, adipiscing erat. Maecenas aliquet sem sit amet nibh ultrices accumsan.
Mauris lobortis sem sed blandit bibendum. In scelerisque eros sed metus aliquet convallis ac eget metus. Donec eget feugiat sem. Quisque venenatis, augue et blandit vulputate, velit odio viverra dolor, eu iaculis eros urna ut nunc. Duis faucibus mattis enim ut ultricies. Donec scelerisque volutpat elit, vel varius ante porttitor vel. Duis neque nulla, ultrices vel est id, molestie semper odio. Maecenas condimentum felis vitae nibh venenatis, ut feugiat risus vehicula. Suspendisse non sapien neque. Etiam et lorem consequat lorem aliquam ullamcorper. Pellentesque id vestibulum neque, at aliquam turpis. Aenean ultrices nec erat sit amet aliquam. Morbi eu sem in augue cursus ullamcorper a sed dolor. Integer et lobortis nulla, sit amet laoreet elit. In elementum, nibh nec volutpat pretium, lectus est pulvinar arcu, vehicula lobortis tellus sem id mauris. Maecenas ac blandit purus, sit amet scelerisque magna.
In hac habitasse platea dictumst. In lacinia elit non risus venenatis viverra. Nulla vestibulum laoreet turpis ac accumsan. Vivamus eros felis, rhoncus vel interdum bibendum, imperdiet nec diam. Etiam sed eros sed orci pellentesque sagittis. Praesent a fermentum leo. Vivamus ipsum risus, faucibus a dignissim ut, ullamcorper nec risus. Etiam quis adipiscing velit. Nam ac cursus arcu. Sed bibendum lectus quis massa dapibus dapibus. Vestibulum fermentum eros vitae hendrerit condimentum.
Fusce viverra eleifend iaculis. Maecenas tempor dictum cursus. Mauris faucibus, tortor in bibendum ornare, nibh lorem sollicitudin est, sed consectetur nulla dui imperdiet urna. Fusce aliquet odio fermentum massa mollis, id feugiat lacus egestas. Integer et eleifend metus. Duis neque tellus, vulputate nec dui eu, euismod sodales orci. Vivamus turpis erat, consectetur et pulvinar nec, ornare a quam. Maecenas fermentum, ligula vitae consectetur lobortis, mi lacus fermentum ante, ut semper lacus lectus porta orci. Nulla vehicula sodales eros, in iaculis ante laoreet at. Sed venenatis interdum metus, egestas scelerisque orci laoreet ut. Donec fermentum enim eget nibh blandit laoreet. Proin lacinia adipiscing lorem vel ornare. Donec ullamcorper massa elementum urna varius viverra. Proin pharetra, erat at feugiat rhoncus, velit eros condimentum mi, ac mattis sapien dolor non elit. Aenean viverra purus id tincidunt vulputate.
Etiam vel augue vel nisl commodo suscipit et ac nisl. Quisque eros diam, porttitor et aliquet sed, vulputate in odio. Aenean feugiat est quis neque vehicula, eget vulputate nunc tempor. Donec quis nulla ut quam feugiat consectetur ut et justo. Nulla congue, metus auctor facilisis scelerisque, nunc risus vulputate urna, in blandit urna nibh et neque. Etiam quis tortor ut nulla dignissim dictum non sed ligula. Vivamus accumsan ligula eget ipsum ultrices, a tincidunt urna blandit. In hac habitasse platea dictumst.`)
doc := document.NewDocument("a").AddField(document.NewTextField("full", []uint64{}, fieldBytes))
docMatch := search.DocumentMatch{
ID: "a",
Score: 1.0,
Locations: search.FieldTermLocationMap{
"full": search.TermLocationMap{
"metus": []*search.Location{
{
Pos: 0,
Start: 883,
End: 888,
},
{
Pos: 0,
Start: 915,
End: 920,
},
{
Pos: 0,
Start: 2492,
End: 2497,
},
{
Pos: 0,
Start: 2822,
End: 2827,
},
{
Pos: 0,
Start: 3417,
End: 3422,
},
},
"interdum": []*search.Location{
{
Pos: 0,
Start: 1891,
End: 1899,
},
{
Pos: 0,
Start: 2813,
End: 2821,
},
},
"venenatis": []*search.Location{
{
Pos: 0,
Start: 954,
End: 963,
},
{
Pos: 0,
Start: 1252,
End: 1261,
},
{
Pos: 0,
Start: 1795,
End: 1804,
},
{
Pos: 0,
Start: 2803,
End: 2812,
},
},
},
},
}
expectedFragments := []string{
"…eros, in iaculis ante laoreet at. Sed " + DefaultAnsiHighlight + "venenatis" + reset + " " + DefaultAnsiHighlight + "interdum" + reset + " " + DefaultAnsiHighlight + "metus" + reset + ", egestas scelerisque orci laoreet ut.…",
"… eros sed " + DefaultAnsiHighlight + "metus" + reset + " aliquet convallis ac eget " + DefaultAnsiHighlight + "metus" + reset + ". Donec eget feugiat sem. Quisque " + DefaultAnsiHighlight + "venenatis" + reset + ", augue et…",
"… odio. Maecenas condimentum felis vitae nibh " + DefaultAnsiHighlight + "venenatis" + reset + ", ut feugiat risus vehicula. Suspendisse non s…",
"… id feugiat lacus egestas. Integer et eleifend " + DefaultAnsiHighlight + "metus" + reset + ". Duis neque tellus, vulputate nec dui eu, euism…",
"… accumsan. Vivamus eros felis, rhoncus vel " + DefaultAnsiHighlight + "interdum" + reset + " bibendum, imperdiet nec diam. Etiam sed eros sed…",
}
fragmenter := sfrag.NewFragmenter(100)
formatter := ansi.NewFragmentFormatter(ansi.DefaultAnsiHighlight)
highlighter := NewHighlighter(fragmenter, formatter, DefaultSeparator)
fragments := highlighter.BestFragmentsInField(&docMatch, doc, "full", 5)
if !reflect.DeepEqual(fragments, expectedFragments) {
t.Errorf("expected %#v, got %#v", expectedFragments, fragments)
}
}
================================================
FILE: search/highlight/highlighter.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package highlight
import (
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
type Fragment struct {
Orig []byte
ArrayPositions []uint64
Start int
End int
Score float64
Index int // used by heap
}
func (f *Fragment) Overlaps(other *Fragment) bool {
if other.Start >= f.Start && other.Start < f.End {
return true
} else if f.Start >= other.Start && f.Start < other.End {
return true
}
return false
}
type Fragmenter interface {
Fragment([]byte, TermLocations) []*Fragment
}
type FragmentFormatter interface {
Format(f *Fragment, orderedTermLocations TermLocations) string
}
type FragmentScorer interface {
Score(f *Fragment) float64
}
type Highlighter interface {
Fragmenter() Fragmenter
SetFragmenter(Fragmenter)
FragmentFormatter() FragmentFormatter
SetFragmentFormatter(FragmentFormatter)
Separator() string
SetSeparator(string)
BestFragmentInField(*search.DocumentMatch, index.Document, string) string
BestFragmentsInField(*search.DocumentMatch, index.Document, string, int) []string
}
================================================
FILE: search/highlight/term_locations.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package highlight
import (
"reflect"
"sort"
"github.com/blevesearch/bleve/v2/search"
)
type TermLocation struct {
Term string
ArrayPositions search.ArrayPositions
Pos int
Start int
End int
}
func (tl *TermLocation) Overlaps(other *TermLocation) bool {
if reflect.DeepEqual(tl.ArrayPositions, other.ArrayPositions) {
if other.Start >= tl.Start && other.Start < tl.End {
return true
} else if tl.Start >= other.Start && tl.Start < other.End {
return true
}
}
return false
}
type TermLocations []*TermLocation
func (t TermLocations) Len() int { return len(t) }
func (t TermLocations) Swap(i, j int) { t[i], t[j] = t[j], t[i] }
func (t TermLocations) Less(i, j int) bool {
shortestArrayPositions := len(t[i].ArrayPositions)
if len(t[j].ArrayPositions) < shortestArrayPositions {
shortestArrayPositions = len(t[j].ArrayPositions)
}
// compare all the common array positions
for api := 0; api < shortestArrayPositions; api++ {
if t[i].ArrayPositions[api] < t[j].ArrayPositions[api] {
return true
}
if t[i].ArrayPositions[api] > t[j].ArrayPositions[api] {
return false
}
}
// all the common array positions are the same
if len(t[i].ArrayPositions) < len(t[j].ArrayPositions) {
return true // j array positions, longer so greater
} else if len(t[i].ArrayPositions) > len(t[j].ArrayPositions) {
return false // j array positions, shorter so less
}
// array positions the same, compare starts
return t[i].Start < t[j].Start
}
func (t TermLocations) MergeOverlapping() {
var lastTl *TermLocation
for i, tl := range t {
if lastTl == nil && tl != nil {
lastTl = tl
} else if lastTl != nil && tl != nil {
if lastTl.Overlaps(tl) {
// ok merge this with previous
lastTl.End = tl.End
t[i] = nil
}
}
}
}
func OrderTermLocations(tlm search.TermLocationMap) TermLocations {
rv := make(TermLocations, 0)
for term, locations := range tlm {
for _, location := range locations {
tl := TermLocation{
Term: term,
ArrayPositions: location.ArrayPositions,
Pos: int(location.Pos),
Start: int(location.Start),
End: int(location.End),
}
rv = append(rv, &tl)
}
}
sort.Sort(rv)
return rv
}
================================================
FILE: search/highlight/term_locations_test.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package highlight
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/search"
)
func TestTermLocationOverlaps(t *testing.T) {
tests := []struct {
left *TermLocation
right *TermLocation
expected bool
}{
{
left: &TermLocation{
Start: 0,
End: 5,
},
right: &TermLocation{
Start: 3,
End: 7,
},
expected: true,
},
{
left: &TermLocation{
Start: 0,
End: 5,
},
right: &TermLocation{
Start: 5,
End: 7,
},
expected: false,
},
{
left: &TermLocation{
Start: 0,
End: 5,
},
right: &TermLocation{
Start: 7,
End: 11,
},
expected: false,
},
// with array positions
{
left: &TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 0,
End: 5,
},
right: &TermLocation{
ArrayPositions: search.ArrayPositions{1},
Start: 7,
End: 11,
},
expected: false,
},
{
left: &TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 0,
End: 5,
},
right: &TermLocation{
ArrayPositions: search.ArrayPositions{1},
Start: 3,
End: 11,
},
expected: false,
},
{
left: &TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 0,
End: 5,
},
right: &TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 3,
End: 11,
},
expected: true,
},
{
left: &TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 0,
End: 5,
},
right: &TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 7,
End: 11,
},
expected: false,
},
}
for _, test := range tests {
actual := test.left.Overlaps(test.right)
if actual != test.expected {
t.Errorf("expected %t got %t for %#v", test.expected, actual, test)
}
}
}
func TestTermLocationsMergeOverlapping(t *testing.T) {
tests := []struct {
input TermLocations
output TermLocations
}{
{
input: TermLocations{},
output: TermLocations{},
},
{
input: TermLocations{
&TermLocation{
Start: 0,
End: 5,
},
&TermLocation{
Start: 7,
End: 11,
},
},
output: TermLocations{
&TermLocation{
Start: 0,
End: 5,
},
&TermLocation{
Start: 7,
End: 11,
},
},
},
{
input: TermLocations{
&TermLocation{
Start: 0,
End: 5,
},
&TermLocation{
Start: 4,
End: 11,
},
},
output: TermLocations{
&TermLocation{
Start: 0,
End: 11,
},
nil,
},
},
{
input: TermLocations{
&TermLocation{
Start: 0,
End: 5,
},
&TermLocation{
Start: 4,
End: 11,
},
&TermLocation{
Start: 9,
End: 13,
},
},
output: TermLocations{
&TermLocation{
Start: 0,
End: 13,
},
nil,
nil,
},
},
{
input: TermLocations{
&TermLocation{
Start: 0,
End: 5,
},
&TermLocation{
Start: 4,
End: 11,
},
&TermLocation{
Start: 9,
End: 13,
},
&TermLocation{
Start: 15,
End: 21,
},
},
output: TermLocations{
&TermLocation{
Start: 0,
End: 13,
},
nil,
nil,
&TermLocation{
Start: 15,
End: 21,
},
},
},
// with array positions
{
input: TermLocations{
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 0,
End: 5,
},
&TermLocation{
ArrayPositions: search.ArrayPositions{1},
Start: 7,
End: 11,
},
},
output: TermLocations{
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 0,
End: 5,
},
&TermLocation{
ArrayPositions: search.ArrayPositions{1},
Start: 7,
End: 11,
},
},
},
{
input: TermLocations{
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 0,
End: 5,
},
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 7,
End: 11,
},
},
output: TermLocations{
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 0,
End: 5,
},
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 7,
End: 11,
},
},
},
{
input: TermLocations{
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 0,
End: 5,
},
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 3,
End: 11,
},
},
output: TermLocations{
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 0,
End: 11,
},
nil,
},
},
{
input: TermLocations{
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 0,
End: 5,
},
&TermLocation{
ArrayPositions: search.ArrayPositions{1},
Start: 3,
End: 11,
},
},
output: TermLocations{
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Start: 0,
End: 5,
},
&TermLocation{
ArrayPositions: search.ArrayPositions{1},
Start: 3,
End: 11,
},
},
},
}
for _, test := range tests {
test.input.MergeOverlapping()
if !reflect.DeepEqual(test.input, test.output) {
t.Errorf("expected: %#v got %#v", test.output, test.input)
}
}
}
func TestTermLocationsOrder(t *testing.T) {
tests := []struct {
input search.TermLocationMap
output TermLocations
}{
{
input: search.TermLocationMap{},
output: TermLocations{},
},
{
input: search.TermLocationMap{
"term": []*search.Location{
{
Start: 0,
},
{
Start: 5,
},
},
},
output: TermLocations{
&TermLocation{
Term: "term",
Start: 0,
},
&TermLocation{
Term: "term",
Start: 5,
},
},
},
{
input: search.TermLocationMap{
"term": []*search.Location{
{
Start: 5,
},
{
Start: 0,
},
},
},
output: TermLocations{
&TermLocation{
Term: "term",
Start: 0,
},
&TermLocation{
Term: "term",
Start: 5,
},
},
},
// with array positions
{
input: search.TermLocationMap{
"term": []*search.Location{
{
ArrayPositions: search.ArrayPositions{0},
Start: 0,
},
{
ArrayPositions: search.ArrayPositions{0},
Start: 5,
},
},
},
output: TermLocations{
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Term: "term",
Start: 0,
},
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Term: "term",
Start: 5,
},
},
},
{
input: search.TermLocationMap{
"term": []*search.Location{
{
ArrayPositions: search.ArrayPositions{0},
Start: 5,
},
{
ArrayPositions: search.ArrayPositions{0},
Start: 0,
},
},
},
output: TermLocations{
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Term: "term",
Start: 0,
},
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Term: "term",
Start: 5,
},
},
},
{
input: search.TermLocationMap{
"term": []*search.Location{
{
ArrayPositions: search.ArrayPositions{0},
Start: 5,
},
{
ArrayPositions: search.ArrayPositions{1},
Start: 0,
},
},
},
output: TermLocations{
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Term: "term",
Start: 5,
},
&TermLocation{
ArrayPositions: search.ArrayPositions{1},
Term: "term",
Start: 0,
},
},
},
{
input: search.TermLocationMap{
"term": []*search.Location{
{
ArrayPositions: search.ArrayPositions{0},
Start: 5,
},
{
ArrayPositions: search.ArrayPositions{0, 1},
Start: 0,
},
},
},
output: TermLocations{
&TermLocation{
ArrayPositions: search.ArrayPositions{0},
Term: "term",
Start: 5,
},
&TermLocation{
ArrayPositions: search.ArrayPositions{0, 1},
Term: "term",
Start: 0,
},
},
},
}
for _, test := range tests {
actual := OrderTermLocations(test.input)
if !reflect.DeepEqual(actual, test.output) {
t.Errorf("expected: %#v got %#v", test.output, actual)
}
}
}
================================================
FILE: search/levenshtein.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"math"
)
func LevenshteinDistance(a, b string) int {
la := len(a)
lb := len(b)
d := make([]int, la+1)
var lastdiag, olddiag, temp int
for i := 1; i <= la; i++ {
d[i] = i
}
for i := 1; i <= lb; i++ {
d[0] = i
lastdiag = i - 1
for j := 1; j <= la; j++ {
olddiag = d[j]
min := d[j] + 1
if (d[j-1] + 1) < min {
min = d[j-1] + 1
}
if a[j-1] == b[i-1] {
temp = 0
} else {
temp = 1
}
if (lastdiag + temp) < min {
min = lastdiag + temp
}
d[j] = min
lastdiag = olddiag
}
}
return d[la]
}
// LevenshteinDistanceMax same as LevenshteinDistance but
// attempts to bail early once we know the distance
// will be greater than max
// in which case the first return val will be the max
// and the second will be true, indicating max was exceeded
func LevenshteinDistanceMax(a, b string, max int) (int, bool) {
v, wasMax, _ := LevenshteinDistanceMaxReuseSlice(a, b, max, nil)
return v, wasMax
}
func LevenshteinDistanceMaxReuseSlice(a, b string, max int, d []int) (int, bool, []int) {
la := len(a)
lb := len(b)
ld := int(math.Abs(float64(la - lb)))
if ld > max {
return max, true, d
} else if la == 0 || lb == 0 {
// if one string of the two strings is empty, then ld is
// the length of the other string and as such is <= max
return ld, false, d
}
if cap(d) < la+1 {
d = make([]int, la+1)
}
d = d[:la+1]
var lastdiag, olddiag, temp int
for i := 1; i <= la; i++ {
d[i] = i
}
for i := 1; i <= lb; i++ {
d[0] = i
lastdiag = i - 1
rowmin := max + 1
for j := 1; j <= la; j++ {
olddiag = d[j]
min := d[j] + 1
if (d[j-1] + 1) < min {
min = d[j-1] + 1
}
if a[j-1] == b[i-1] {
temp = 0
} else {
temp = 1
}
if (lastdiag + temp) < min {
min = lastdiag + temp
}
if min < rowmin {
rowmin = min
}
d[j] = min
lastdiag = olddiag
}
// after each row if rowmin isn't less than max stop
if rowmin > max {
return max, true, d
}
}
return d[la], false, d
}
================================================
FILE: search/levenshtein_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"testing"
)
func TestLevenshteinDistance(t *testing.T) {
tests := []struct {
a string
b string
dist int
}{
{
"water",
"atec",
2,
},
{
"water",
"aphex",
4,
},
}
for _, test := range tests {
actual := LevenshteinDistance(test.a, test.b)
if actual != test.dist {
t.Errorf("expected %d, got %d for %s and %s", test.dist, actual, test.a, test.b)
}
}
}
func TestLevenshteinDistanceMax(t *testing.T) {
tests := []struct {
a string
b string
max int
dist int
exceeded bool
}{
{
a: "water",
b: "atec",
max: 1,
dist: 1,
exceeded: true,
},
{
a: "water",
b: "christmas",
max: 3,
dist: 3,
exceeded: true,
},
{
a: "",
b: "water",
max: 10,
dist: 5,
exceeded: false,
},
{
a: "water",
b: "",
max: 3,
dist: 3,
exceeded: true,
},
}
for _, test := range tests {
actual, exceeded := LevenshteinDistanceMax(test.a, test.b, test.max)
if actual != test.dist || exceeded != test.exceeded {
t.Errorf("expected %d %t, got %d %t for %s and %s", test.dist, test.exceeded, actual, exceeded, test.a, test.b)
}
}
}
// 5 terms that are less than 2
// 5 terms that are more than 2
var benchmarkTerms = []string{
"watex",
"aters",
"wayer",
"wbter",
"yater",
"christmas",
"waterwaterwater",
"watcatdogfish",
"q",
"couchbase",
}
func BenchmarkLevenshteinDistance(b *testing.B) {
a := "water"
for i := 0; i < b.N; i++ {
for _, t := range benchmarkTerms {
LevenshteinDistance(a, t)
}
}
}
func BenchmarkLevenshteinDistanceMax(b *testing.B) {
a := "water"
for i := 0; i < b.N; i++ {
for _, t := range benchmarkTerms {
LevenshteinDistanceMax(a, t, 2)
}
}
}
================================================
FILE: search/pool.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"reflect"
)
var reflectStaticSizeDocumentMatchPool int
func init() {
var dmp DocumentMatchPool
reflectStaticSizeDocumentMatchPool = int(reflect.TypeOf(dmp).Size())
}
// DocumentMatchPoolTooSmall is a callback function that can be executed
// when the DocumentMatchPool does not have sufficient capacity
// By default we just perform just-in-time allocation, but you could log
// a message, or panic, etc.
type DocumentMatchPoolTooSmall func(p *DocumentMatchPool) *DocumentMatch
// DocumentMatchPool manages use/reuse of DocumentMatch instances
// it pre-allocates space from a single large block with the expected
// number of instances. It is not thread-safe as currently all
// aspects of search take place in a single goroutine.
type DocumentMatchPool struct {
avail DocumentMatchCollection
TooSmall DocumentMatchPoolTooSmall
}
func defaultDocumentMatchPoolTooSmall(p *DocumentMatchPool) *DocumentMatch {
return &DocumentMatch{}
}
// NewDocumentMatchPool will build a DocumentMatchPool with memory
// pre-allocated to accommodate the requested number of DocumentMatch
// instances
func NewDocumentMatchPool(size, sortsize int) *DocumentMatchPool {
avail := make(DocumentMatchCollection, size)
// pre-allocate the expected number of instances
startBlock := make([]DocumentMatch, size)
startSorts := make([]string, size*sortsize)
// make these initial instances available
i, j := 0, 0
for i < size {
avail[i] = &startBlock[i]
avail[i].Sort = startSorts[j:j]
i += 1
j += sortsize
}
return &DocumentMatchPool{
avail: avail,
TooSmall: defaultDocumentMatchPoolTooSmall,
}
}
// Get returns an available DocumentMatch from the pool
// if the pool was not allocated with sufficient size, an allocation will
// occur to satisfy this request. As a side-effect this will grow the size
// of the pool.
func (p *DocumentMatchPool) Get() *DocumentMatch {
var rv *DocumentMatch
if len(p.avail) > 0 {
rv, p.avail = p.avail[len(p.avail)-1], p.avail[:len(p.avail)-1]
} else {
rv = p.TooSmall(p)
}
return rv
}
// Put returns a DocumentMatch to the pool
func (p *DocumentMatchPool) Put(d *DocumentMatch) {
if d == nil {
return
}
// reset DocumentMatch before returning it to available pool
d.Reset()
p.avail = append(p.avail, d)
}
================================================
FILE: search/pool_test.go
================================================
// Copyright (c) 2013 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import "testing"
func TestDocumentMatchPool(t *testing.T) {
tooManyCalled := false
// create a pool
dmp := NewDocumentMatchPool(10, 0)
dmp.TooSmall = func(inner *DocumentMatchPool) *DocumentMatch {
tooManyCalled = true
return &DocumentMatch{}
}
// get 10 instances without returning
returned := make(DocumentMatchCollection, 10)
for i := 0; i < 10; i++ {
returned[i] = dmp.Get()
if tooManyCalled {
t.Fatal("too many function called before expected")
}
}
// get one more and see if too many function is called
extra := dmp.Get()
if !tooManyCalled {
t.Fatal("expected too many function to be called, but wasn't")
}
// return the first 10
for i := 0; i < 10; i++ {
dmp.Put(returned[i])
}
// check len and cap
if len(dmp.avail) != 10 {
t.Fatalf("expected 10 available, got %d", len(dmp.avail))
}
if cap(dmp.avail) != 10 {
t.Fatalf("expected avail cap still 10, got %d", cap(dmp.avail))
}
// return the extra
dmp.Put(extra)
// check len and cap grown to 11
if len(dmp.avail) != 11 {
t.Fatalf("expected 11 available, got %d", len(dmp.avail))
}
// cap grows, but not by 1 (append behavior)
if cap(dmp.avail) <= 10 {
t.Fatalf("expected avail cap mpore than 10, got %d", cap(dmp.avail))
}
}
================================================
FILE: search/query/bool_field.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type BoolFieldQuery struct {
Bool bool `json:"bool"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewBoolFieldQuery creates a new Query for boolean fields
func NewBoolFieldQuery(val bool) *BoolFieldQuery {
return &BoolFieldQuery{
Bool: val,
}
}
func (q *BoolFieldQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *BoolFieldQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *BoolFieldQuery) SetField(f string) {
q.FieldVal = f
}
func (q *BoolFieldQuery) Field() string {
return q.FieldVal
}
func (q *BoolFieldQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
term := "F"
if q.Bool {
term = "T"
}
return searcher.NewTermSearcher(ctx, i, term, field, q.BoostVal.Value(), options)
}
================================================
FILE: search/query/boolean.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
type BooleanQuery struct {
Must Query `json:"must,omitempty"`
Should Query `json:"should,omitempty"`
MustNot Query `json:"must_not,omitempty"`
Filter Query `json:"filter,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
queryStringMode bool
}
// NewBooleanQuery creates a compound Query composed
// of several other Query objects.
// Result documents must satisfy ALL of the
// must Queries.
// Result documents must satisfy NONE of the must not
// Queries.
// Result documents that ALSO satisfy any of the should
// Queries will score higher.
func NewBooleanQuery(must []Query, should []Query, mustNot []Query) *BooleanQuery {
rv := BooleanQuery{}
if len(must) > 0 {
rv.Must = NewConjunctionQuery(must)
}
if len(should) > 0 {
rv.Should = NewDisjunctionQuery(should)
}
if len(mustNot) > 0 {
rv.MustNot = NewDisjunctionQuery(mustNot)
}
return &rv
}
func NewBooleanQueryForQueryString(must []Query, should []Query, mustNot []Query) *BooleanQuery {
rv := NewBooleanQuery(nil, nil, nil)
rv.queryStringMode = true
rv.AddMust(must...)
rv.AddShould(should...)
rv.AddMustNot(mustNot...)
return rv
}
// SetMinShould requires that at least minShould of the
// should Queries must be satisfied.
func (q *BooleanQuery) SetMinShould(minShould float64) {
q.Should.(*DisjunctionQuery).SetMin(minShould)
}
func (q *BooleanQuery) AddMust(m ...Query) {
if m == nil {
return
}
if q.Must == nil {
tmp := NewConjunctionQuery([]Query{})
tmp.queryStringMode = q.queryStringMode
q.Must = tmp
}
for _, mq := range m {
q.Must.(*ConjunctionQuery).AddQuery(mq)
}
}
func (q *BooleanQuery) AddShould(m ...Query) {
if m == nil {
return
}
if q.Should == nil {
tmp := NewDisjunctionQuery([]Query{})
tmp.queryStringMode = q.queryStringMode
q.Should = tmp
}
for _, mq := range m {
q.Should.(*DisjunctionQuery).AddQuery(mq)
}
}
func (q *BooleanQuery) AddMustNot(m ...Query) {
if m == nil {
return
}
if q.MustNot == nil {
tmp := NewDisjunctionQuery([]Query{})
tmp.queryStringMode = q.queryStringMode
q.MustNot = tmp
}
for _, mq := range m {
q.MustNot.(*DisjunctionQuery).AddQuery(mq)
}
}
func (q *BooleanQuery) AddFilter(m Query) {
if m == nil {
return
}
q.Filter = m
}
func (q *BooleanQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *BooleanQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *BooleanQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
var err error
var mustNotSearcher search.Searcher
if q.MustNot != nil {
mustNotSearcher, err = q.MustNot.Searcher(ctx, i, m, options)
if err != nil {
return nil, err
}
// if must not is MatchNone, reset it to nil
if _, ok := mustNotSearcher.(*searcher.MatchNoneSearcher); ok {
mustNotSearcher = nil
}
}
var mustSearcher search.Searcher
if q.Must != nil {
mustSearcher, err = q.Must.Searcher(ctx, i, m, options)
if err != nil {
return nil, err
}
// if must searcher is MatchNone, reset it to nil
if _, ok := mustSearcher.(*searcher.MatchNoneSearcher); ok {
mustSearcher = nil
}
}
var shouldSearcher search.Searcher
if q.Should != nil {
shouldSearcher, err = q.Should.Searcher(ctx, i, m, options)
if err != nil {
return nil, err
}
// if should searcher is MatchNone, reset it to nil
if _, ok := shouldSearcher.(*searcher.MatchNoneSearcher); ok {
shouldSearcher = nil
}
}
var filterFunc searcher.FilterFunc
if q.Filter != nil {
// create a new searcher options with disabled scoring, since filter should not affect scoring
// and we don't want to pay the cost of scoring if we don't need it, also disable term vectors
// and explain, since we don't need them for filters
filterOptions := search.SearcherOptions{
Explain: false,
IncludeTermVectors: false,
Score: "none",
}
filterSearcher, err := q.Filter.Searcher(ctx, i, m, filterOptions)
if err != nil {
return nil, err
}
var init bool
var refDoc *search.DocumentMatch
filterFunc = func(sctx *search.SearchContext, d *search.DocumentMatch) bool {
// Initialize the reference document to point
// to the first document in the filterSearcher
var err error
if !init {
refDoc, err = filterSearcher.Next(sctx)
if err != nil {
return false
}
init = true
}
if refDoc == nil {
// filterSearcher is exhausted, d is not in filter
return false
}
// Compare document IDs
cmp := refDoc.IndexInternalID.Compare(d.IndexInternalID)
if cmp < 0 {
// recycle refDoc now that we do not need it
sctx.DocumentMatchPool.Put(refDoc)
// filterSearcher is behind the current document, Advance() it
refDoc, err = filterSearcher.Advance(sctx, d.IndexInternalID)
if err != nil || refDoc == nil {
return false
}
// After advance, check if they're now equal
cmp = refDoc.IndexInternalID.Compare(d.IndexInternalID)
}
// cmp >= 0: either equal (match) or filterSearcher is ahead (no match)
return cmp == 0
}
}
// if all 4 are nil, return MatchNone
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher == nil && filterFunc == nil {
return searcher.NewMatchNoneSearcher(i)
}
// optimization, if only must searcher, just return it instead
if mustSearcher != nil && shouldSearcher == nil && mustNotSearcher == nil && filterFunc == nil {
return mustSearcher, nil
}
// optimization, if only should searcher, just return it instead
if mustSearcher == nil && shouldSearcher != nil && mustNotSearcher == nil && filterFunc == nil {
return shouldSearcher, nil
}
// optimization, if only filter searcher, wrap around a MatchAllSearcher
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher == nil && filterFunc != nil {
mustSearcher, err = searcher.NewMatchAllSearcher(ctx, i, 1.0, options)
if err != nil {
return nil, err
}
return searcher.NewFilteringSearcher(ctx,
mustSearcher,
filterFunc,
), nil
}
// if only mustNotSearcher, start with MatchAll
if mustSearcher == nil && shouldSearcher == nil && mustNotSearcher != nil {
mustSearcher, err = searcher.NewMatchAllSearcher(ctx, i, 1.0, options)
if err != nil {
return nil, err
}
}
bs, err := searcher.NewBooleanSearcher(ctx, i, mustSearcher, shouldSearcher, mustNotSearcher, options)
if err != nil {
return nil, err
}
if filterFunc != nil {
return searcher.NewFilteringSearcher(ctx, bs, filterFunc), nil
}
return bs, nil
}
func (q *BooleanQuery) Validate() error {
if qm, ok := q.Must.(ValidatableQuery); ok {
err := qm.Validate()
if err != nil {
return err
}
}
if qs, ok := q.Should.(ValidatableQuery); ok {
err := qs.Validate()
if err != nil {
return err
}
}
if qmn, ok := q.MustNot.(ValidatableQuery); ok {
err := qmn.Validate()
if err != nil {
return err
}
}
if qf, ok := q.Filter.(ValidatableQuery); ok {
err := qf.Validate()
if err != nil {
return err
}
}
if q.Must == nil && q.Should == nil && q.MustNot == nil && q.Filter == nil {
return fmt.Errorf("boolean query must contain at least one must or should or not must or filter clause")
}
return nil
}
func (q *BooleanQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Must json.RawMessage `json:"must,omitempty"`
Should json.RawMessage `json:"should,omitempty"`
MustNot json.RawMessage `json:"must_not,omitempty"`
Filter json.RawMessage `json:"filter,omitempty"`
Boost *Boost `json:"boost,omitempty"`
}{}
err := util.UnmarshalJSON(data, &tmp)
if err != nil {
return err
}
if tmp.Must != nil {
q.Must, err = ParseQuery(tmp.Must)
if err != nil {
return err
}
_, isConjunctionQuery := q.Must.(*ConjunctionQuery)
if !isConjunctionQuery {
return fmt.Errorf("must clause must be conjunction")
}
}
if tmp.Should != nil {
q.Should, err = ParseQuery(tmp.Should)
if err != nil {
return err
}
_, isDisjunctionQuery := q.Should.(*DisjunctionQuery)
if !isDisjunctionQuery {
return fmt.Errorf("should clause must be disjunction")
}
}
if tmp.MustNot != nil {
q.MustNot, err = ParseQuery(tmp.MustNot)
if err != nil {
return err
}
_, isDisjunctionQuery := q.MustNot.(*DisjunctionQuery)
if !isDisjunctionQuery {
return fmt.Errorf("must not clause must be disjunction")
}
}
if tmp.Filter != nil {
q.Filter, err = ParseQuery(tmp.Filter)
if err != nil {
return err
}
}
q.BoostVal = tmp.Boost
return nil
}
================================================
FILE: search/query/boost.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import "fmt"
type Boost float64
func (b *Boost) Value() float64 {
if b == nil {
return 1.0
}
return float64(*b)
}
func (b *Boost) GoString() string {
if b == nil {
return "boost unspecified"
}
return fmt.Sprintf("%f", *b)
}
================================================
FILE: search/query/conjunction.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"encoding/json"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
type ConjunctionQuery struct {
Conjuncts []Query `json:"conjuncts"`
BoostVal *Boost `json:"boost,omitempty"`
queryStringMode bool
}
// NewConjunctionQuery creates a new compound Query.
// Result documents must satisfy all of the queries.
func NewConjunctionQuery(conjuncts []Query) *ConjunctionQuery {
return &ConjunctionQuery{
Conjuncts: conjuncts,
}
}
func (q *ConjunctionQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *ConjunctionQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *ConjunctionQuery) AddQuery(aq ...Query) {
q.Conjuncts = append(q.Conjuncts, aq...)
}
func (q *ConjunctionQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
ss := make([]search.Searcher, 0, len(q.Conjuncts))
cleanup := func() {
for _, searcher := range ss {
if searcher != nil {
_ = searcher.Close()
}
}
}
nestedMode, _ := ctx.Value(search.NestedSearchKey).(bool)
var nm mapping.NestedMapping
if nestedMode {
var ok bool
// get the nested mapping
if nm, ok = m.(mapping.NestedMapping); !ok {
// shouldn't be in nested mode if no nested mapping
nestedMode = false
}
}
// set of fields used in this query
var qfs search.FieldSet
var err error
for _, conjunct := range q.Conjuncts {
// Gather fields when nested mode is enabled
if nestedMode {
qfs, err = ExtractFields(conjunct, m, qfs)
if err != nil {
cleanup()
return nil, err
}
}
sr, err := conjunct.Searcher(ctx, i, m, options)
if err != nil {
cleanup()
return nil, err
}
if _, ok := sr.(*searcher.MatchNoneSearcher); ok && q.queryStringMode {
// in query string mode, skip match none
continue
}
ss = append(ss, sr)
}
if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i)
}
if nestedMode {
// first determine the nested depth info for the query fields
commonDepth, maxDepth := nm.NestedDepth(qfs)
// if we have common depth == max depth then we can just use
// the normal conjunction searcher, as all fields share the same
// nested context, otherwise we need to use the nested conjunction searcher
// also, if we are querying the _all or _id fields, we need to use
// the nested conjunction searcher as well, with common depth 0
// indicating matches happen only at the root level
if qfs.HasAll() || qfs.HasID() {
commonDepth = 0
}
if commonDepth < maxDepth {
return searcher.NewNestedConjunctionSearcher(ctx, i, ss, commonDepth, options)
}
}
return searcher.NewConjunctionSearcher(ctx, i, ss, options)
}
func (q *ConjunctionQuery) Validate() error {
for _, q := range q.Conjuncts {
if q, ok := q.(ValidatableQuery); ok {
err := q.Validate()
if err != nil {
return err
}
}
}
return nil
}
func (q *ConjunctionQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Conjuncts []json.RawMessage `json:"conjuncts"`
Boost *Boost `json:"boost,omitempty"`
}{}
err := util.UnmarshalJSON(data, &tmp)
if err != nil {
return err
}
q.Conjuncts = make([]Query, len(tmp.Conjuncts))
for i, term := range tmp.Conjuncts {
query, err := ParseQuery(term)
if err != nil {
return err
}
q.Conjuncts[i] = query
}
q.BoostVal = tmp.Boost
return nil
}
================================================
FILE: search/query/date_range.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"fmt"
"math"
"time"
"github.com/blevesearch/bleve/v2/analysis/datetime/optional"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
// QueryDateTimeParser controls the default query date time parser.
var QueryDateTimeParser = optional.Name
// QueryDateTimeFormat controls the format when Marshaling to JSON.
var QueryDateTimeFormat = time.RFC3339
var cache = registry.NewCache()
type BleveQueryTime struct {
time.Time
}
var MinRFC3339CompatibleTime time.Time
var MaxRFC3339CompatibleTime time.Time
func init() {
MinRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "1677-12-01T00:00:00Z")
MaxRFC3339CompatibleTime, _ = time.Parse(time.RFC3339, "2262-04-11T11:59:59Z")
}
func queryTimeFromString(t string) (time.Time, error) {
dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser)
if err != nil {
return time.Time{}, err
}
rv, _, err := dateTimeParser.ParseDateTime(t)
if err != nil {
return time.Time{}, err
}
return rv, nil
}
func (t *BleveQueryTime) MarshalJSON() ([]byte, error) {
tt := time.Time(t.Time)
return []byte("\"" + tt.Format(QueryDateTimeFormat) + "\""), nil
}
func (t *BleveQueryTime) UnmarshalJSON(data []byte) error {
var timeString string
err := util.UnmarshalJSON(data, &timeString)
if err != nil {
return err
}
dateTimeParser, err := cache.DateTimeParserNamed(QueryDateTimeParser)
if err != nil {
return err
}
t.Time, _, err = dateTimeParser.ParseDateTime(timeString)
if err != nil {
return err
}
return nil
}
type DateRangeQuery struct {
Start BleveQueryTime `json:"start,omitempty"`
End BleveQueryTime `json:"end,omitempty"`
InclusiveStart *bool `json:"inclusive_start,omitempty"`
InclusiveEnd *bool `json:"inclusive_end,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewDateRangeQuery creates a new Query for ranges
// of date values.
// Date strings are parsed using the DateTimeParser configured in the
// top-level config.QueryDateTimeParser
// Either, but not both endpoints can be nil.
func NewDateRangeQuery(start, end time.Time) *DateRangeQuery {
return NewDateRangeInclusiveQuery(start, end, nil, nil)
}
// NewDateRangeInclusiveQuery creates a new Query for ranges
// of date values.
// Date strings are parsed using the DateTimeParser configured in the
// top-level config.QueryDateTimeParser
// Either, but not both endpoints can be nil.
// startInclusive and endInclusive control inclusion of the endpoints.
func NewDateRangeInclusiveQuery(start, end time.Time, startInclusive, endInclusive *bool) *DateRangeQuery {
return &DateRangeQuery{
Start: BleveQueryTime{start},
End: BleveQueryTime{end},
InclusiveStart: startInclusive,
InclusiveEnd: endInclusive,
}
}
func (q *DateRangeQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *DateRangeQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *DateRangeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *DateRangeQuery) Field() string {
return q.FieldVal
}
func (q *DateRangeQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
min, max, err := q.parseEndpoints()
if err != nil {
return nil, err
}
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewNumericRangeSearcher(ctx, i, min, max, q.InclusiveStart, q.InclusiveEnd, field, q.BoostVal.Value(), options)
}
func (q *DateRangeQuery) parseEndpoints() (*float64, *float64, error) {
min := math.Inf(-1)
max := math.Inf(1)
if !q.Start.IsZero() {
if !isDatetimeCompatible(q.Start) {
// overflow
return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start)
}
startInt64 := q.Start.UnixNano()
min = numeric.Int64ToFloat64(startInt64)
}
if !q.End.IsZero() {
if !isDatetimeCompatible(q.End) {
// overflow
return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End)
}
endInt64 := q.End.UnixNano()
max = numeric.Int64ToFloat64(endInt64)
}
return &min, &max, nil
}
func (q *DateRangeQuery) Validate() error {
if q.Start.IsZero() && q.End.IsZero() {
return fmt.Errorf("must specify start or end")
}
_, _, err := q.parseEndpoints()
if err != nil {
return err
}
return nil
}
func isDatetimeCompatible(t BleveQueryTime) bool {
if QueryDateTimeFormat == time.RFC3339 &&
(t.Before(MinRFC3339CompatibleTime) || t.After(MaxRFC3339CompatibleTime)) {
return false
}
return true
}
================================================
FILE: search/query/date_range_string.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"fmt"
"math"
"time"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
// DateRangeStringQuery represents a query for a range of date values.
// Start and End are the range endpoints, as strings.
// Start and End are parsed using DateTimeParser, which is a custom date time parser
// defined in the index mapping. If DateTimeParser is not specified, then the
// top-level config.QueryDateTimeParser is used.
type DateRangeStringQuery struct {
Start string `json:"start,omitempty"`
End string `json:"end,omitempty"`
InclusiveStart *bool `json:"inclusive_start,omitempty"`
InclusiveEnd *bool `json:"inclusive_end,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
DateTimeParser string `json:"datetime_parser,omitempty"`
}
// NewDateRangeStringQuery creates a new Query for ranges
// of date values.
// Date strings are parsed using the DateTimeParser field of the query struct,
// which is a custom date time parser defined in the index mapping.
// if DateTimeParser is not specified, then the
// top-level config.QueryDateTimeParser is used.
// Either, but not both endpoints can be nil.
func NewDateRangeStringQuery(start, end string) *DateRangeStringQuery {
return NewDateRangeStringInclusiveQuery(start, end, nil, nil)
}
// NewDateRangeStringInclusiveQuery creates a new Query for ranges
// of date values.
// Date strings are parsed using the DateTimeParser field of the query struct,
// which is a custom date time parser defined in the index mapping.
// if DateTimeParser is not specified, then the
// top-level config.QueryDateTimeParser is used.
// Either, but not both endpoints can be nil.
// startInclusive and endInclusive control inclusion of the endpoints.
func NewDateRangeStringInclusiveQuery(start, end string, startInclusive, endInclusive *bool) *DateRangeStringQuery {
return &DateRangeStringQuery{
Start: start,
End: end,
InclusiveStart: startInclusive,
InclusiveEnd: endInclusive,
}
}
func (q *DateRangeStringQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *DateRangeStringQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *DateRangeStringQuery) SetField(f string) {
q.FieldVal = f
}
func (q *DateRangeStringQuery) Field() string {
return q.FieldVal
}
func (q *DateRangeStringQuery) SetDateTimeParser(d string) {
q.DateTimeParser = d
}
func (q *DateRangeStringQuery) DateTimeParserName() string {
return q.DateTimeParser
}
func (q *DateRangeStringQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
dateTimeParserName := QueryDateTimeParser
if q.DateTimeParser != "" {
dateTimeParserName = q.DateTimeParser
}
dateTimeParser := m.DateTimeParserNamed(dateTimeParserName)
if dateTimeParser == nil {
return nil, fmt.Errorf("no dateTimeParser named '%s' registered", dateTimeParserName)
}
var startTime, endTime time.Time
var err error
if q.Start != "" {
startTime, _, err = dateTimeParser.ParseDateTime(q.Start)
if err != nil {
return nil, fmt.Errorf("%v, date time parser name: %s", err, dateTimeParserName)
}
}
if q.End != "" {
endTime, _, err = dateTimeParser.ParseDateTime(q.End)
if err != nil {
return nil, fmt.Errorf("%v, date time parser name: %s", err, dateTimeParserName)
}
}
min, max, err := q.parseEndpoints(startTime, endTime)
if err != nil {
return nil, err
}
return searcher.NewNumericRangeSearcher(ctx, i, min, max, q.InclusiveStart, q.InclusiveEnd, field, q.BoostVal.Value(), options)
}
func (q *DateRangeStringQuery) parseEndpoints(startTime, endTime time.Time) (*float64, *float64, error) {
min := math.Inf(-1)
max := math.Inf(1)
if startTime.IsZero() && endTime.IsZero() {
return nil, nil, fmt.Errorf("date range query must specify at least one of start/end")
}
if !startTime.IsZero() {
if !isDateTimeWithinRange(startTime) {
// overflow
return nil, nil, fmt.Errorf("invalid/unsupported date range, start: %v", q.Start)
}
startInt64 := startTime.UnixNano()
min = numeric.Int64ToFloat64(startInt64)
}
if !endTime.IsZero() {
if !isDateTimeWithinRange(endTime) {
// overflow
return nil, nil, fmt.Errorf("invalid/unsupported date range, end: %v", q.End)
}
endInt64 := endTime.UnixNano()
max = numeric.Int64ToFloat64(endInt64)
}
return &min, &max, nil
}
func (q *DateRangeStringQuery) Validate() error {
// either start or end must be specified
if q.Start == "" && q.End == "" {
return fmt.Errorf("date range query must specify at least one of start/end")
}
return nil
}
func isDateTimeWithinRange(t time.Time) bool {
if t.Before(MinRFC3339CompatibleTime) || t.After(MaxRFC3339CompatibleTime) {
return false
}
return true
}
================================================
FILE: search/query/date_range_test.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"encoding/json"
"testing"
"time"
)
func TestBleveQueryTime(t *testing.T) {
testTimes := []time.Time{
time.Now(),
{},
}
for i, testTime := range testTimes {
bqt := &BleveQueryTime{testTime}
buf, err := json.Marshal(bqt)
if err != nil {
t.Errorf("expected no err")
}
var bqt2 BleveQueryTime
err = json.Unmarshal(buf, &bqt2)
if err != nil {
t.Errorf("expected no unmarshal err, got: %v", err)
}
if bqt.Time.Format(time.RFC3339) != bqt2.Time.Format(time.RFC3339) {
t.Errorf("test %d - expected same time, %#v != %#v", i, bqt.Time, bqt2.Time)
}
if testTime.Format(time.RFC3339) != bqt2.Time.Format(time.RFC3339) {
t.Errorf("test %d - expected orig time, %#v != %#v", i, testTime, bqt2.Time)
}
}
}
func TestValidateDatetimeRanges(t *testing.T) {
tests := []struct {
start string
end string
expect bool
}{
{
start: "2019-03-22T13:25:00Z",
end: "2019-03-22T18:25:00Z",
expect: true,
},
{
start: "2019-03-22T13:25:00Z",
end: "9999-03-22T13:25:00Z",
expect: false,
},
{
start: "2019-03-22T13:25:00Z",
end: "2262-04-11T11:59:59Z",
expect: true,
},
{
start: "2019-03-22T13:25:00Z",
end: "2262-04-12T00:00:00Z",
expect: false,
},
{
start: "1950-03-22T12:23:23Z",
end: "1960-02-21T15:23:34Z",
expect: true,
},
{
start: "0001-01-01T00:00:00Z",
end: "0001-01-01T00:00:00Z",
expect: false,
},
{
start: "0001-01-01T00:00:00Z",
end: "2000-01-01T00:00:00Z",
expect: true,
},
{
start: "1677-11-30T11:59:59Z",
end: "2262-04-11T11:59:59Z",
expect: false,
},
{
start: "2262-04-12T00:00:00Z",
end: "2262-04-11T11:59:59Z",
expect: false,
},
{
start: "1677-12-01T00:00:00Z",
end: "2262-04-12T00:00:00Z",
expect: false,
},
{
start: "1677-12-01T00:00:00Z",
end: "1677-11-30T11:59:59Z",
expect: false,
},
{
start: "1677-12-01T00:00:00Z",
end: "2262-04-11T11:59:59Z",
expect: true,
},
}
for _, test := range tests {
startTime, _ := time.Parse(time.RFC3339, test.start)
endTime, _ := time.Parse(time.RFC3339, test.end)
dateRangeQuery := NewDateRangeQuery(startTime, endTime)
if (dateRangeQuery.Validate() == nil) != test.expect {
t.Errorf("unexpected results while validating date range query with"+
" {start: %v, end: %v}, expected: %v",
test.start, test.end, test.expect)
}
}
}
================================================
FILE: search/query/disjunction.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
type DisjunctionQuery struct {
Disjuncts []Query `json:"disjuncts"`
BoostVal *Boost `json:"boost,omitempty"`
Min float64 `json:"min"`
retrieveScoreBreakdown bool
queryStringMode bool
}
func (q *DisjunctionQuery) RetrieveScoreBreakdown(b bool) {
q.retrieveScoreBreakdown = b
}
// NewDisjunctionQuery creates a new compound Query.
// Result documents satisfy at least one Query.
func NewDisjunctionQuery(disjuncts []Query) *DisjunctionQuery {
return &DisjunctionQuery{
Disjuncts: disjuncts,
}
}
func (q *DisjunctionQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *DisjunctionQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *DisjunctionQuery) AddQuery(aq ...Query) {
q.Disjuncts = append(q.Disjuncts, aq...)
}
func (q *DisjunctionQuery) SetMin(m float64) {
q.Min = m
}
func (q *DisjunctionQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping,
options search.SearcherOptions,
) (search.Searcher, error) {
ss := make([]search.Searcher, 0, len(q.Disjuncts))
for _, disjunct := range q.Disjuncts {
sr, err := disjunct.Searcher(ctx, i, m, options)
if err != nil {
for _, searcher := range ss {
if searcher != nil {
_ = searcher.Close()
}
}
return nil, err
}
if sr != nil {
if _, ok := sr.(*searcher.MatchNoneSearcher); ok && q.queryStringMode {
// in query string mode, skip match none
continue
}
ss = append(ss, sr)
}
}
if len(ss) < 1 {
return searcher.NewMatchNoneSearcher(i)
}
nctx := context.WithValue(ctx, search.IncludeScoreBreakdownKey, q.retrieveScoreBreakdown)
return searcher.NewDisjunctionSearcher(nctx, i, ss, q.Min, options)
}
func (q *DisjunctionQuery) Validate() error {
if int(q.Min) > len(q.Disjuncts) {
return fmt.Errorf("disjunction query has fewer than the minimum number of clauses to satisfy")
}
for _, q := range q.Disjuncts {
if q, ok := q.(ValidatableQuery); ok {
err := q.Validate()
if err != nil {
return err
}
}
}
return nil
}
func (q *DisjunctionQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Disjuncts []json.RawMessage `json:"disjuncts"`
Boost *Boost `json:"boost,omitempty"`
Min float64 `json:"min"`
}{}
err := util.UnmarshalJSON(data, &tmp)
if err != nil {
return err
}
q.Disjuncts = make([]Query, len(tmp.Disjuncts))
for i, term := range tmp.Disjuncts {
query, err := ParseQuery(term)
if err != nil {
return err
}
q.Disjuncts[i] = query
}
q.BoostVal = tmp.Boost
q.Min = tmp.Min
return nil
}
================================================
FILE: search/query/docid.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type DocIDQuery struct {
IDs []string `json:"ids"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewDocIDQuery creates a new Query object returning indexed documents among
// the specified set. Combine it with ConjunctionQuery to restrict the scope of
// other queries output.
func NewDocIDQuery(ids []string) *DocIDQuery {
return &DocIDQuery{
IDs: ids,
}
}
func (q *DocIDQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *DocIDQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *DocIDQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewDocIDSearcher(ctx, i, q.IDs, q.BoostVal.Value(), options)
}
================================================
FILE: search/query/fuzzy.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
type FuzzyQuery struct {
Term string `json:"term"`
Prefix int `json:"prefix_length"`
Fuzziness int `json:"fuzziness"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
autoFuzzy bool
}
// NewFuzzyQuery creates a new Query which finds
// documents containing terms within a specific
// fuzziness of the specified term.
// The default fuzziness is 1.
//
// The current implementation uses Levenshtein edit
// distance as the fuzziness metric.
func NewFuzzyQuery(term string) *FuzzyQuery {
return &FuzzyQuery{
Term: term,
Fuzziness: 1,
}
}
func (q *FuzzyQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *FuzzyQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *FuzzyQuery) SetField(f string) {
q.FieldVal = f
}
func (q *FuzzyQuery) Field() string {
return q.FieldVal
}
func (q *FuzzyQuery) SetFuzziness(f int) {
q.Fuzziness = f
}
func (q *FuzzyQuery) SetAutoFuzziness(a bool) {
q.autoFuzzy = a
}
func (q *FuzzyQuery) SetPrefix(p int) {
q.Prefix = p
}
func (q *FuzzyQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
if q.autoFuzzy {
return searcher.NewAutoFuzzySearcher(ctx, i, q.Term, q.Prefix, field, q.BoostVal.Value(), options)
}
return searcher.NewFuzzySearcher(ctx, i, q.Term, q.Prefix, q.Fuzziness, field, q.BoostVal.Value(), options)
}
func (q *FuzzyQuery) UnmarshalJSON(data []byte) error {
type Alias FuzzyQuery
aux := &struct {
Fuzziness interface{} `json:"fuzziness"`
*Alias
}{
Alias: (*Alias)(q),
}
if err := util.UnmarshalJSON(data, &aux); err != nil {
return err
}
switch v := aux.Fuzziness.(type) {
case float64:
q.Fuzziness = int(v)
case string:
if v == "auto" {
q.autoFuzzy = true
}
}
return nil
}
func (f *FuzzyQuery) MarshalJSON() ([]byte, error) {
var fuzzyValue interface{}
if f.autoFuzzy {
fuzzyValue = "auto"
} else {
fuzzyValue = f.Fuzziness
}
type fuzzyQuery struct {
Term string `json:"term"`
Prefix int `json:"prefix_length"`
Fuzziness interface{} `json:"fuzziness"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
aux := fuzzyQuery{
Term: f.Term,
Prefix: f.Prefix,
Fuzziness: fuzzyValue,
FieldVal: f.FieldVal,
BoostVal: f.BoostVal,
}
return util.MarshalJSON(aux)
}
================================================
FILE: search/query/geo_boundingbox.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"fmt"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
type GeoBoundingBoxQuery struct {
TopLeft []float64 `json:"top_left,omitempty"`
BottomRight []float64 `json:"bottom_right,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewGeoBoundingBoxQuery(topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64) *GeoBoundingBoxQuery {
return &GeoBoundingBoxQuery{
TopLeft: []float64{topLeftLon, topLeftLat},
BottomRight: []float64{bottomRightLon, bottomRightLat},
}
}
func (q *GeoBoundingBoxQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoBoundingBoxQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoBoundingBoxQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoBoundingBoxQuery) Field() string {
return q.FieldVal
}
func (q *GeoBoundingBoxQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
if q.BottomRight[0] < q.TopLeft[0] {
// cross date line, rewrite as two parts
leftSearcher, err := searcher.NewGeoBoundingBoxSearcher(ctx, i, -180, q.BottomRight[1], q.BottomRight[0], q.TopLeft[1], field, q.BoostVal.Value(), options, true)
if err != nil {
return nil, err
}
rightSearcher, err := searcher.NewGeoBoundingBoxSearcher(ctx, i, q.TopLeft[0], q.BottomRight[1], 180, q.TopLeft[1], field, q.BoostVal.Value(), options, true)
if err != nil {
_ = leftSearcher.Close()
return nil, err
}
return searcher.NewDisjunctionSearcher(ctx, i, []search.Searcher{leftSearcher, rightSearcher}, 0, options)
}
return searcher.NewGeoBoundingBoxSearcher(ctx, i, q.TopLeft[0], q.BottomRight[1], q.BottomRight[0], q.TopLeft[1], field, q.BoostVal.Value(), options, true)
}
func (q *GeoBoundingBoxQuery) Validate() error {
if q.TopLeft[1] < q.BottomRight[1] {
return fmt.Errorf("geo bounding box top left should be higher than bottom right")
}
return nil
}
func (q *GeoBoundingBoxQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
TopLeft interface{} `json:"top_left,omitempty"`
BottomRight interface{} `json:"bottom_right,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}{}
err := util.UnmarshalJSON(data, &tmp)
if err != nil {
return err
}
// now use our generic point parsing code from the geo package
lon, lat, found := geo.ExtractGeoPoint(tmp.TopLeft)
if !found {
return fmt.Errorf("geo location top_left not in a valid format")
}
q.TopLeft = []float64{lon, lat}
lon, lat, found = geo.ExtractGeoPoint(tmp.BottomRight)
if !found {
return fmt.Errorf("geo location bottom_right not in a valid format")
}
q.BottomRight = []float64{lon, lat}
q.FieldVal = tmp.FieldVal
q.BoostVal = tmp.BoostVal
return nil
}
================================================
FILE: search/query/geo_boundingpolygon.go
================================================
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"fmt"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
type GeoBoundingPolygonQuery struct {
Points []geo.Point `json:"polygon_points"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewGeoBoundingPolygonQuery(points []geo.Point) *GeoBoundingPolygonQuery {
return &GeoBoundingPolygonQuery{
Points: points}
}
func (q *GeoBoundingPolygonQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoBoundingPolygonQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoBoundingPolygonQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoBoundingPolygonQuery) Field() string {
return q.FieldVal
}
func (q *GeoBoundingPolygonQuery) Searcher(ctx context.Context, i index.IndexReader,
m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
return searcher.NewGeoBoundedPolygonSearcher(ctx, i, q.Points, field, q.BoostVal.Value(), options)
}
func (q *GeoBoundingPolygonQuery) Validate() error {
return nil
}
func (q *GeoBoundingPolygonQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Points []interface{} `json:"polygon_points"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}{}
err := util.UnmarshalJSON(data, &tmp)
if err != nil {
return err
}
q.Points = make([]geo.Point, 0, len(tmp.Points))
for _, i := range tmp.Points {
// now use our generic point parsing code from the geo package
lon, lat, found := geo.ExtractGeoPoint(i)
if !found {
return fmt.Errorf("geo polygon point: %v is not in a valid format", i)
}
q.Points = append(q.Points, geo.Point{Lon: lon, Lat: lat})
}
q.FieldVal = tmp.FieldVal
q.BoostVal = tmp.BoostVal
return nil
}
================================================
FILE: search/query/geo_distance.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"fmt"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
type GeoDistanceQuery struct {
Location []float64 `json:"location,omitempty"`
Distance string `json:"distance,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewGeoDistanceQuery(lon, lat float64, distance string) *GeoDistanceQuery {
return &GeoDistanceQuery{
Location: []float64{lon, lat},
Distance: distance,
}
}
func (q *GeoDistanceQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoDistanceQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoDistanceQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoDistanceQuery) Field() string {
return q.FieldVal
}
func (q *GeoDistanceQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping,
options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
dist, err := geo.ParseDistance(q.Distance)
if err != nil {
return nil, err
}
return searcher.NewGeoPointDistanceSearcher(ctx, i, q.Location[0], q.Location[1],
dist, field, q.BoostVal.Value(), options)
}
func (q *GeoDistanceQuery) Validate() error {
return nil
}
func (q *GeoDistanceQuery) UnmarshalJSON(data []byte) error {
tmp := struct {
Location interface{} `json:"location,omitempty"`
Distance string `json:"distance,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}{}
err := util.UnmarshalJSON(data, &tmp)
if err != nil {
return err
}
// now use our generic point parsing code from the geo package
lon, lat, found := geo.ExtractGeoPoint(tmp.Location)
if !found {
return fmt.Errorf("geo location not in a valid format")
}
q.Location = []float64{lon, lat}
q.Distance = tmp.Distance
q.FieldVal = tmp.FieldVal
q.BoostVal = tmp.BoostVal
return nil
}
================================================
FILE: search/query/geo_shape.go
================================================
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"encoding/json"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
type Geometry struct {
Shape index.GeoJSON `json:"shape"`
Relation string `json:"relation"`
}
type GeoShapeQuery struct {
Geometry Geometry `json:"geometry"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewGeoShapeQuery creates a geoshape query for the
// given shape type. This method can be used for
// creating geoshape queries for shape types like: point,
// linestring, polygon, multipoint, multilinestring,
// multipolygon and envelope.
func NewGeoShapeQuery(coordinates [][][][]float64, typ,
relation string) (*GeoShapeQuery, error) {
s, _, err := geo.NewGeoJsonShape(coordinates, typ)
if err != nil {
return nil, err
}
return &GeoShapeQuery{Geometry: Geometry{Shape: s,
Relation: relation}}, nil
}
// NewGeoShapeCircleQuery creates a geoshape query for the
// given center point and the radius. Radius formats supported:
// "5in" "5inch" "7yd" "7yards" "9ft" "9feet" "11km" "11kilometers"
// "3nm" "3nauticalmiles" "13mm" "13millimeters" "15cm" "15centimeters"
// "17mi" "17miles" "19m" "19meters" If the unit cannot be determined,
// the entire string is parsed and the unit of meters is assumed.
func NewGeoShapeCircleQuery(coordinates []float64, radius,
relation string) (*GeoShapeQuery, error) {
s, _, err := geo.NewGeoCircleShape(coordinates, radius)
if err != nil {
return nil, err
}
return &GeoShapeQuery{Geometry: Geometry{Shape: s,
Relation: relation}}, nil
}
// NewGeometryCollectionQuery creates a geoshape query for the
// given geometrycollection coordinates and types.
func NewGeometryCollectionQuery(coordinates [][][][][]float64, types []string,
relation string) (*GeoShapeQuery, error) {
s, _, err := geo.NewGeometryCollection(coordinates, types)
if err != nil {
return nil, err
}
return &GeoShapeQuery{Geometry: Geometry{Shape: s,
Relation: relation}}, nil
}
func (q *GeoShapeQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *GeoShapeQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *GeoShapeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *GeoShapeQuery) Field() string {
return q.FieldVal
}
func (q *GeoShapeQuery) Searcher(ctx context.Context, i index.IndexReader,
m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Geo)
return searcher.NewGeoShapeSearcher(ctx, i, q.Geometry.Shape, q.Geometry.Relation, field,
q.BoostVal.Value(), options)
}
func (q *GeoShapeQuery) Validate() error {
return nil
}
func (q *Geometry) UnmarshalJSON(data []byte) error {
tmp := struct {
Shape json.RawMessage `json:"shape"`
Relation string `json:"relation"`
}{}
err := util.UnmarshalJSON(data, &tmp)
if err != nil {
return err
}
q.Shape, err = geo.ParseGeoJSONShape(tmp.Shape)
if err != nil {
return err
}
q.Relation = tmp.Relation
return nil
}
================================================
FILE: search/query/ip_range.go
================================================
// Copyright (c) 2021 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"fmt"
"net"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type IPRangeQuery struct {
CIDR string `json:"cidr,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
func NewIPRangeQuery(cidr string) *IPRangeQuery {
return &IPRangeQuery{
CIDR: cidr,
}
}
func (q *IPRangeQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *IPRangeQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *IPRangeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *IPRangeQuery) Field() string {
return q.FieldVal
}
func (q *IPRangeQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
_, ipNet, err := net.ParseCIDR(q.CIDR)
if err != nil {
ip := net.ParseIP(q.CIDR)
if ip == nil {
return nil, err
}
// If we are searching for a specific ip rather than members of a network, just use a term search.
return searcher.NewTermSearcherBytes(ctx, i, ip.To16(), field, q.BoostVal.Value(), options)
}
return searcher.NewIPRangeSearcher(ctx, i, ipNet, field, q.BoostVal.Value(), options)
}
func (q *IPRangeQuery) Validate() error {
_, _, err := net.ParseCIDR(q.CIDR)
if err == nil {
return nil
}
// We also allow search for a specific IP.
ip := net.ParseIP(q.CIDR)
if ip != nil {
return nil // we have a valid ip
}
return fmt.Errorf("IPRangeQuery must be for a network or ip address, %q", q.CIDR)
}
================================================
FILE: search/query/knn.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package query
import (
"context"
"encoding/json"
"fmt"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type KNNQuery struct {
VectorField string `json:"field"`
Vector []float32 `json:"vector"`
K int64 `json:"k"`
BoostVal *Boost `json:"boost,omitempty"`
// see KNNRequest.Params for description
Params json.RawMessage `json:"params"`
// elegibleSelector is used to filter out documents that are
// eligible for the KNN search from a pre-filter query.
elegibleSelector index.EligibleDocumentSelector
}
func NewKNNQuery(vector []float32) *KNNQuery {
return &KNNQuery{Vector: vector}
}
func (q *KNNQuery) Field() string {
return q.VectorField
}
func (q *KNNQuery) SetK(k int64) {
q.K = k
}
func (q *KNNQuery) SetField(field string) {
q.VectorField = field
}
func (q *KNNQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *KNNQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *KNNQuery) SetParams(params json.RawMessage) {
q.Params = params
}
func (q *KNNQuery) SetEligibleSelector(eligibleSelector index.EligibleDocumentSelector) {
q.elegibleSelector = eligibleSelector
}
func (q *KNNQuery) Searcher(ctx context.Context, i index.IndexReader,
m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
fieldMapping := m.FieldMappingForPath(q.VectorField)
similarityMetric := fieldMapping.Similarity
if similarityMetric == "" {
similarityMetric = index.DefaultVectorSimilarityMetric
}
if q.K <= 0 || len(q.Vector) == 0 {
return nil, fmt.Errorf("k must be greater than 0 and vector must be non-empty")
}
// bivf-sq8 indexes only supports hamming distance for the primary
// binary index. Similarity here is used for the backing flat index,
// which is set to cosine similarity for recall reasons
if index.OptimizationRequiresBinaryIndex(fieldMapping.VectorIndexOptimizedFor) {
similarityMetric = index.CosineSimilarity
}
if similarityMetric == index.CosineSimilarity {
// normalize the vector
q.Vector = mapping.NormalizeVector(q.Vector)
}
return searcher.NewKNNSearcher(ctx, i, m, options, q.VectorField,
q.Vector, q.K, q.BoostVal.Value(), similarityMetric, q.Params,
q.elegibleSelector)
}
================================================
FILE: search/query/match.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"fmt"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
type MatchQuery struct {
Match string `json:"match"`
FieldVal string `json:"field,omitempty"`
Analyzer string `json:"analyzer,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
Prefix int `json:"prefix_length"`
Fuzziness int `json:"fuzziness"`
Operator MatchQueryOperator `json:"operator,omitempty"`
autoFuzzy bool
}
type MatchQueryOperator int
const (
// Document must satisfy AT LEAST ONE of term searches.
MatchQueryOperatorOr = MatchQueryOperator(0)
// Document must satisfy ALL of term searches.
MatchQueryOperatorAnd = MatchQueryOperator(1)
)
func (o MatchQueryOperator) MarshalJSON() ([]byte, error) {
switch o {
case MatchQueryOperatorOr:
return util.MarshalJSON("or")
case MatchQueryOperatorAnd:
return util.MarshalJSON("and")
default:
return nil, fmt.Errorf("cannot marshal match operator %d to JSON", o)
}
}
func (o *MatchQueryOperator) UnmarshalJSON(data []byte) error {
var operatorString string
err := util.UnmarshalJSON(data, &operatorString)
if err != nil {
return err
}
switch operatorString {
case "or":
*o = MatchQueryOperatorOr
return nil
case "and":
*o = MatchQueryOperatorAnd
return nil
default:
return fmt.Errorf("cannot unmarshal match operator '%v' from JSON", o)
}
}
// NewMatchQuery creates a Query for matching text.
// An Analyzer is chosen based on the field.
// Input text is analyzed using this analyzer.
// Token terms resulting from this analysis are
// used to perform term searches. Result documents
// must satisfy at least one of these term searches.
func NewMatchQuery(match string) *MatchQuery {
return &MatchQuery{
Match: match,
Operator: MatchQueryOperatorOr,
}
}
func (q *MatchQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MatchQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MatchQuery) SetField(f string) {
q.FieldVal = f
}
func (q *MatchQuery) Field() string {
return q.FieldVal
}
func (q *MatchQuery) SetFuzziness(f int) {
q.Fuzziness = f
}
func (q *MatchQuery) SetAutoFuzziness(auto bool) {
q.autoFuzzy = auto
}
func (q *MatchQuery) SetPrefix(p int) {
q.Prefix = p
}
func (q *MatchQuery) SetOperator(operator MatchQueryOperator) {
q.Operator = operator
}
func (q *MatchQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
analyzerName := ""
if q.Analyzer != "" {
analyzerName = q.Analyzer
} else {
analyzerName = m.AnalyzerNameForPath(field)
}
analyzer := m.AnalyzerNamed(analyzerName)
if analyzer == nil {
return nil, fmt.Errorf("no analyzer named '%s' registered", q.Analyzer)
}
tokens := analyzer.Analyze([]byte(q.Match))
if len(tokens) > 0 {
tqs := make([]Query, len(tokens))
if q.Fuzziness != 0 || q.autoFuzzy {
for i, token := range tokens {
query := NewFuzzyQuery(string(token.Term))
if q.autoFuzzy {
query.SetAutoFuzziness(true)
} else {
query.SetFuzziness(q.Fuzziness)
}
query.SetPrefix(q.Prefix)
query.SetField(field)
query.SetBoost(q.BoostVal.Value())
tqs[i] = query
}
} else {
for i, token := range tokens {
tq := NewTermQuery(string(token.Term))
tq.SetField(field)
tq.SetBoost(q.BoostVal.Value())
tqs[i] = tq
}
}
switch q.Operator {
case MatchQueryOperatorOr:
shouldQuery := NewDisjunctionQuery(tqs)
shouldQuery.SetMin(1)
shouldQuery.SetBoost(q.BoostVal.Value())
return shouldQuery.Searcher(ctx, i, m, options)
case MatchQueryOperatorAnd:
mustQuery := NewConjunctionQuery(tqs)
mustQuery.SetBoost(q.BoostVal.Value())
return mustQuery.Searcher(ctx, i, m, options)
default:
return nil, fmt.Errorf("unhandled operator %d", q.Operator)
}
}
noneQuery := NewMatchNoneQuery()
return noneQuery.Searcher(ctx, i, m, options)
}
func (q *MatchQuery) UnmarshalJSON(data []byte) error {
type Alias MatchQuery
aux := &struct {
Fuzziness interface{} `json:"fuzziness"`
*Alias
}{
Alias: (*Alias)(q),
}
if err := util.UnmarshalJSON(data, &aux); err != nil {
return err
}
switch v := aux.Fuzziness.(type) {
case float64:
q.Fuzziness = int(v)
case string:
if v == "auto" {
q.autoFuzzy = true
}
}
return nil
}
func (f *MatchQuery) MarshalJSON() ([]byte, error) {
var fuzzyValue interface{}
if f.autoFuzzy {
fuzzyValue = "auto"
} else {
fuzzyValue = f.Fuzziness
}
type match struct {
Match string `json:"match"`
FieldVal string `json:"field,omitempty"`
Analyzer string `json:"analyzer,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
Prefix int `json:"prefix_length"`
Fuzziness interface{} `json:"fuzziness"`
Operator MatchQueryOperator `json:"operator,omitempty"`
}
aux := match{
Match: f.Match,
FieldVal: f.FieldVal,
Analyzer: f.Analyzer,
BoostVal: f.BoostVal,
Prefix: f.Prefix,
Fuzziness: fuzzyValue,
Operator: f.Operator,
}
return util.MarshalJSON(aux)
}
================================================
FILE: search/query/match_all.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"encoding/json"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type MatchAllQuery struct {
BoostVal *Boost `json:"boost,omitempty"`
}
// NewMatchAllQuery creates a Query which will
// match all documents in the index.
func NewMatchAllQuery() *MatchAllQuery {
return &MatchAllQuery{}
}
func (q *MatchAllQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MatchAllQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MatchAllQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewMatchAllSearcher(ctx, i, q.BoostVal.Value(), options)
}
func (q *MatchAllQuery) MarshalJSON() ([]byte, error) {
tmp := map[string]interface{}{
"boost": q.BoostVal,
"match_all": map[string]interface{}{},
}
return json.Marshal(tmp)
}
================================================
FILE: search/query/match_none.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"encoding/json"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type MatchNoneQuery struct {
BoostVal *Boost `json:"boost,omitempty"`
}
// NewMatchNoneQuery creates a Query which will not
// match any documents in the index.
func NewMatchNoneQuery() *MatchNoneQuery {
return &MatchNoneQuery{}
}
func (q *MatchNoneQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MatchNoneQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MatchNoneQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewMatchNoneSearcher(i)
}
func (q *MatchNoneQuery) MarshalJSON() ([]byte, error) {
tmp := map[string]interface{}{
"boost": q.BoostVal,
"match_none": map[string]interface{}{},
}
return json.Marshal(tmp)
}
================================================
FILE: search/query/match_phrase.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"fmt"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
type MatchPhraseQuery struct {
MatchPhrase string `json:"match_phrase"`
FieldVal string `json:"field,omitempty"`
Analyzer string `json:"analyzer,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
Fuzziness int `json:"fuzziness"`
autoFuzzy bool
}
// NewMatchPhraseQuery creates a new Query object
// for matching phrases in the index.
// An Analyzer is chosen based on the field.
// Input text is analyzed using this analyzer.
// Token terms resulting from this analysis are
// used to build a search phrase. Result documents
// must match this phrase. Queried field must have been indexed with
// IncludeTermVectors set to true.
func NewMatchPhraseQuery(matchPhrase string) *MatchPhraseQuery {
return &MatchPhraseQuery{
MatchPhrase: matchPhrase,
}
}
func (q *MatchPhraseQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MatchPhraseQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MatchPhraseQuery) SetField(f string) {
q.FieldVal = f
}
func (q *MatchPhraseQuery) SetFuzziness(f int) {
q.Fuzziness = f
}
func (q *MatchPhraseQuery) SetAutoFuzziness(auto bool) {
q.autoFuzzy = auto
}
func (q *MatchPhraseQuery) Field() string {
return q.FieldVal
}
func (q *MatchPhraseQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
analyzerName := ""
if q.Analyzer != "" {
analyzerName = q.Analyzer
} else {
analyzerName = m.AnalyzerNameForPath(field)
}
analyzer := m.AnalyzerNamed(analyzerName)
if analyzer == nil {
return nil, fmt.Errorf("no analyzer named '%s' registered", q.Analyzer)
}
tokens := analyzer.Analyze([]byte(q.MatchPhrase))
if len(tokens) > 0 {
phrase := tokenStreamToPhrase(tokens)
phraseQuery := NewMultiPhraseQuery(phrase, field)
phraseQuery.SetBoost(q.BoostVal.Value())
if q.autoFuzzy {
phraseQuery.SetAutoFuzziness(true)
} else {
phraseQuery.SetFuzziness(q.Fuzziness)
}
return phraseQuery.Searcher(ctx, i, m, options)
}
noneQuery := NewMatchNoneQuery()
return noneQuery.Searcher(ctx, i, m, options)
}
func tokenStreamToPhrase(tokens analysis.TokenStream) [][]string {
firstPosition := int(^uint(0) >> 1)
lastPosition := 0
for _, token := range tokens {
if token.Position < firstPosition {
firstPosition = token.Position
}
if token.Position > lastPosition {
lastPosition = token.Position
}
}
phraseLen := lastPosition - firstPosition + 1
if phraseLen > 0 {
rv := make([][]string, phraseLen)
for _, token := range tokens {
pos := token.Position - firstPosition
rv[pos] = append(rv[pos], string(token.Term))
}
return rv
}
return nil
}
func (q *MatchPhraseQuery) UnmarshalJSON(data []byte) error {
type Alias MatchPhraseQuery
aux := &struct {
Fuzziness interface{} `json:"fuzziness"`
*Alias
}{
Alias: (*Alias)(q),
}
if err := util.UnmarshalJSON(data, &aux); err != nil {
return err
}
switch v := aux.Fuzziness.(type) {
case float64:
q.Fuzziness = int(v)
case string:
if v == "auto" {
q.autoFuzzy = true
}
}
return nil
}
func (f *MatchPhraseQuery) MarshalJSON() ([]byte, error) {
var fuzzyValue interface{}
if f.autoFuzzy {
fuzzyValue = "auto"
} else {
fuzzyValue = f.Fuzziness
}
type matchPhrase struct {
MatchPhrase string `json:"match_phrase"`
FieldVal string `json:"field,omitempty"`
Analyzer string `json:"analyzer,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
Fuzziness interface{} `json:"fuzziness"`
}
aux := matchPhrase{
MatchPhrase: f.MatchPhrase,
FieldVal: f.FieldVal,
Analyzer: f.Analyzer,
BoostVal: f.BoostVal,
Fuzziness: fuzzyValue,
}
return util.MarshalJSON(aux)
}
================================================
FILE: search/query/match_phrase_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/analysis"
)
func TestTokenStreamToPhrase(t *testing.T) {
tests := []struct {
tokens analysis.TokenStream
result [][]string
}{
// empty token stream returns nil
{
tokens: analysis.TokenStream{},
result: nil,
},
// typical token
{
tokens: analysis.TokenStream{
&analysis.Token{
Term: []byte("one"),
Position: 1,
},
&analysis.Token{
Term: []byte("two"),
Position: 2,
},
},
result: [][]string{{"one"}, {"two"}},
},
// token stream containing a gap (usually from stop words)
{
tokens: analysis.TokenStream{
&analysis.Token{
Term: []byte("wag"),
Position: 1,
},
&analysis.Token{
Term: []byte("dog"),
Position: 3,
},
},
result: [][]string{{"wag"}, nil, {"dog"}},
},
// token stream containing multiple tokens at the same position
{
tokens: analysis.TokenStream{
&analysis.Token{
Term: []byte("nia"),
Position: 1,
},
&analysis.Token{
Term: []byte("onia"),
Position: 1,
},
&analysis.Token{
Term: []byte("donia"),
Position: 1,
},
&analysis.Token{
Term: []byte("imo"),
Position: 2,
},
&analysis.Token{
Term: []byte("nimo"),
Position: 2,
},
&analysis.Token{
Term: []byte("ónimo"),
Position: 2,
},
},
result: [][]string{{"nia", "onia", "donia"}, {"imo", "nimo", "ónimo"}},
},
}
for i, test := range tests {
actual := tokenStreamToPhrase(test.tokens)
if !reflect.DeepEqual(actual, test.result) {
t.Fatalf("expected %#v got %#v for test %d", test.result, actual, i)
}
}
}
================================================
FILE: search/query/multi_phrase.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"fmt"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
type MultiPhraseQuery struct {
Terms [][]string `json:"terms"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
Fuzziness int `json:"fuzziness"`
autoFuzzy bool
}
// NewMultiPhraseQuery creates a new Query for finding
// term phrases in the index.
// It is like PhraseQuery, but each position in the
// phrase may be satisfied by a list of terms
// as opposed to just one.
// At least one of the terms must exist in the correct
// order, at the correct index offsets, in the
// specified field. Queried field must have been indexed with
// IncludeTermVectors set to true.
func NewMultiPhraseQuery(terms [][]string, field string) *MultiPhraseQuery {
return &MultiPhraseQuery{
Terms: terms,
FieldVal: field,
}
}
func (q *MultiPhraseQuery) SetFuzziness(f int) {
q.Fuzziness = f
}
func (q *MultiPhraseQuery) SetAutoFuzziness(auto bool) {
q.autoFuzzy = auto
}
func (q *MultiPhraseQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *MultiPhraseQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *MultiPhraseQuery) Field() string {
return q.FieldVal
}
func (q *MultiPhraseQuery) SetField(f string) {
q.FieldVal = f
}
func (q *MultiPhraseQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewMultiPhraseSearcher(ctx, i, q.Terms, q.Fuzziness, q.autoFuzzy, q.FieldVal, q.BoostVal.Value(), options)
}
func (q *MultiPhraseQuery) Validate() error {
if len(q.Terms) < 1 {
return fmt.Errorf("phrase query must contain at least one term")
}
return nil
}
func (q *MultiPhraseQuery) UnmarshalJSON(data []byte) error {
type Alias MultiPhraseQuery
aux := &struct {
Fuzziness interface{} `json:"fuzziness"`
*Alias
}{
Alias: (*Alias)(q),
}
if err := util.UnmarshalJSON(data, &aux); err != nil {
return err
}
switch v := aux.Fuzziness.(type) {
case float64:
q.Fuzziness = int(v)
case string:
if v == "auto" {
q.autoFuzzy = true
}
}
return nil
}
func (f *MultiPhraseQuery) MarshalJSON() ([]byte, error) {
var fuzzyValue interface{}
if f.autoFuzzy {
fuzzyValue = "auto"
} else {
fuzzyValue = f.Fuzziness
}
type multiPhraseQuery struct {
Terms [][]string `json:"terms"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
Fuzziness interface{} `json:"fuzziness"`
}
aux := multiPhraseQuery{
Terms: f.Terms,
FieldVal: f.FieldVal,
BoostVal: f.BoostVal,
Fuzziness: fuzzyValue,
}
return util.MarshalJSON(aux)
}
================================================
FILE: search/query/numeric_range.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"fmt"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type NumericRangeQuery struct {
Min *float64 `json:"min,omitempty"`
Max *float64 `json:"max,omitempty"`
InclusiveMin *bool `json:"inclusive_min,omitempty"`
InclusiveMax *bool `json:"inclusive_max,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewNumericRangeQuery creates a new Query for ranges
// of numeric values.
// Either, but not both endpoints can be nil.
// The minimum value is inclusive.
// The maximum value is exclusive.
func NewNumericRangeQuery(min, max *float64) *NumericRangeQuery {
return NewNumericRangeInclusiveQuery(min, max, nil, nil)
}
// NewNumericRangeInclusiveQuery creates a new Query for ranges
// of numeric values.
// Either, but not both endpoints can be nil.
// Control endpoint inclusion with inclusiveMin, inclusiveMax.
func NewNumericRangeInclusiveQuery(min, max *float64, minInclusive, maxInclusive *bool) *NumericRangeQuery {
return &NumericRangeQuery{
Min: min,
Max: max,
InclusiveMin: minInclusive,
InclusiveMax: maxInclusive,
}
}
func (q *NumericRangeQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *NumericRangeQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *NumericRangeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *NumericRangeQuery) Field() string {
return q.FieldVal
}
func (q *NumericRangeQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Numeric)
return searcher.NewNumericRangeSearcher(ctx, i, q.Min, q.Max, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options)
}
func (q *NumericRangeQuery) Validate() error {
if q.Min == nil && q.Min == q.Max {
return fmt.Errorf("numeric range query must specify min or max")
}
return nil
}
================================================
FILE: search/query/phrase.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"fmt"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
type PhraseQuery struct {
Terms []string `json:"terms"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
Fuzziness int `json:"fuzziness"`
autoFuzzy bool
}
// NewPhraseQuery creates a new Query for finding
// exact term phrases in the index.
// The provided terms must exist in the correct
// order, at the correct index offsets, in the
// specified field. Queried field must have been indexed with
// IncludeTermVectors set to true.
func NewPhraseQuery(terms []string, field string) *PhraseQuery {
return &PhraseQuery{
Terms: terms,
FieldVal: field,
}
}
func (q *PhraseQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *PhraseQuery) SetFuzziness(f int) {
q.Fuzziness = f
}
func (q *PhraseQuery) SetAutoFuzziness(auto bool) {
q.autoFuzzy = auto
}
func (q *PhraseQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *PhraseQuery) SetField(f string) {
q.FieldVal = f
}
func (q *PhraseQuery) Field() string {
return q.FieldVal
}
func (q *PhraseQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
return searcher.NewPhraseSearcher(ctx, i, q.Terms, q.Fuzziness, q.autoFuzzy, q.FieldVal, q.BoostVal.Value(), options)
}
func (q *PhraseQuery) Validate() error {
if len(q.Terms) < 1 {
return fmt.Errorf("phrase query must contain at least one term")
}
return nil
}
func (q *PhraseQuery) UnmarshalJSON(data []byte) error {
type Alias PhraseQuery
aux := &struct {
Fuzziness interface{} `json:"fuzziness"`
*Alias
}{
Alias: (*Alias)(q),
}
if err := util.UnmarshalJSON(data, &aux); err != nil {
return err
}
switch v := aux.Fuzziness.(type) {
case float64:
q.Fuzziness = int(v)
case string:
if v == "auto" {
q.autoFuzzy = true
}
}
return nil
}
func (f *PhraseQuery) MarshalJSON() ([]byte, error) {
var fuzzyValue interface{}
if f.autoFuzzy {
fuzzyValue = "auto"
} else {
fuzzyValue = f.Fuzziness
}
type phraseQuery struct {
Terms []string `json:"terms"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
Fuzziness interface{} `json:"fuzziness"`
}
aux := phraseQuery{
Terms: f.Terms,
FieldVal: f.FieldVal,
BoostVal: f.BoostVal,
Fuzziness: fuzzyValue,
}
return util.MarshalJSON(aux)
}
================================================
FILE: search/query/prefix.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type PrefixQuery struct {
Prefix string `json:"prefix"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewPrefixQuery creates a new Query which finds
// documents containing terms that start with the
// specified prefix.
func NewPrefixQuery(prefix string) *PrefixQuery {
return &PrefixQuery{
Prefix: prefix,
}
}
func (q *PrefixQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *PrefixQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *PrefixQuery) SetField(f string) {
q.FieldVal = f
}
func (q *PrefixQuery) Field() string {
return q.FieldVal
}
func (q *PrefixQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewTermPrefixSearcher(ctx, i, q.Prefix, field, q.BoostVal.Value(), options)
}
================================================
FILE: search/query/query.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"encoding/json"
"fmt"
"io"
"log"
"strings"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
"github.com/blevesearch/bleve/v2/util"
index "github.com/blevesearch/bleve_index_api"
)
var logger = log.New(io.Discard, "bleve mapping ", log.LstdFlags)
// SetLog sets the logger used for logging
// by default log messages are sent to io.Discard
func SetLog(l *log.Logger) {
logger = l
}
// A Query represents a description of the type
// and parameters for a query into the index.
type Query interface {
Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping,
options search.SearcherOptions) (search.Searcher, error)
}
// A BoostableQuery represents a Query which can be boosted
// relative to other queries.
type BoostableQuery interface {
Query
SetBoost(b float64)
Boost() float64
}
// A FieldableQuery represents a Query which can be restricted
// to a single field.
type FieldableQuery interface {
Query
SetField(f string)
Field() string
}
// A ValidatableQuery represents a Query which can be validated
// prior to execution.
type ValidatableQuery interface {
Query
Validate() error
}
// ParsePreSearchData deserializes a JSON representation of
// a PreSearchData object.
func ParsePreSearchData(input []byte) (map[string]interface{}, error) {
var rv map[string]interface{}
var tmp map[string]json.RawMessage
err := util.UnmarshalJSON(input, &tmp)
if err != nil {
return nil, err
}
for k, v := range tmp {
switch k {
case search.KnnPreSearchDataKey:
var value []*search.DocumentMatch
if v != nil {
err := util.UnmarshalJSON(v, &value)
if err != nil {
return nil, err
}
}
if rv == nil {
rv = make(map[string]interface{})
}
rv[search.KnnPreSearchDataKey] = value
case search.SynonymPreSearchDataKey:
var value search.FieldTermSynonymMap
if v != nil {
err := util.UnmarshalJSON(v, &value)
if err != nil {
return nil, err
}
}
if rv == nil {
rv = make(map[string]interface{})
}
rv[search.SynonymPreSearchDataKey] = value
case search.BM25PreSearchDataKey:
var value *search.BM25Stats
if v != nil {
err := util.UnmarshalJSON(v, &value)
if err != nil {
return nil, err
}
}
if rv == nil {
rv = make(map[string]interface{})
}
rv[search.BM25PreSearchDataKey] = value
}
}
return rv, nil
}
// ParseQuery deserializes a JSON representation of
// a Query object.
func ParseQuery(input []byte) (Query, error) {
if len(input) == 0 {
// interpret as a match_none query
return NewMatchNoneQuery(), nil
}
var tmp map[string]interface{}
err := util.UnmarshalJSON(input, &tmp)
if err != nil {
return nil, err
}
if len(tmp) == 0 {
// interpret as a match_none query
return NewMatchNoneQuery(), nil
}
_, hasFuzziness := tmp["fuzziness"]
_, isMatchQuery := tmp["match"]
_, isMatchPhraseQuery := tmp["match_phrase"]
_, hasTerms := tmp["terms"]
if hasFuzziness && !isMatchQuery && !isMatchPhraseQuery && !hasTerms {
var rv FuzzyQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
if isMatchQuery {
var rv MatchQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
if isMatchPhraseQuery {
var rv MatchPhraseQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
if hasTerms {
var rv PhraseQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
// now try multi-phrase
var rv2 MultiPhraseQuery
err = util.UnmarshalJSON(input, &rv2)
if err != nil {
return nil, err
}
return &rv2, nil
}
return &rv, nil
}
_, isTermQuery := tmp["term"]
if isTermQuery {
var rv TermQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasMust := tmp["must"]
_, hasShould := tmp["should"]
_, hasMustNot := tmp["must_not"]
_, hasFilter := tmp["filter"]
if hasMust || hasShould || hasMustNot || hasFilter {
var rv BooleanQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasConjuncts := tmp["conjuncts"]
if hasConjuncts {
var rv ConjunctionQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasDisjuncts := tmp["disjuncts"]
if hasDisjuncts {
var rv DisjunctionQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasSyntaxQuery := tmp["query"]
if hasSyntaxQuery {
var rv QueryStringQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasMin := tmp["min"].(float64)
_, hasMax := tmp["max"].(float64)
if hasMin || hasMax {
var rv NumericRangeQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasMinStr := tmp["min"].(string)
_, hasMaxStr := tmp["max"].(string)
if hasMinStr || hasMaxStr {
var rv TermRangeQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasStart := tmp["start"]
_, hasEnd := tmp["end"]
if hasStart || hasEnd {
var rv DateRangeStringQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasPrefix := tmp["prefix"]
if hasPrefix {
var rv PrefixQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasRegexp := tmp["regexp"]
if hasRegexp {
var rv RegexpQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasWildcard := tmp["wildcard"]
if hasWildcard {
var rv WildcardQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasMatchAll := tmp["match_all"]
if hasMatchAll {
var rv MatchAllQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasMatchNone := tmp["match_none"]
if hasMatchNone {
var rv MatchNoneQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasDocIds := tmp["ids"]
if hasDocIds {
var rv DocIDQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasBool := tmp["bool"]
if hasBool {
var rv BoolFieldQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasTopLeft := tmp["top_left"]
_, hasBottomRight := tmp["bottom_right"]
if hasTopLeft && hasBottomRight {
var rv GeoBoundingBoxQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasDistance := tmp["distance"]
if hasDistance {
var rv GeoDistanceQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasPoints := tmp["polygon_points"]
if hasPoints {
var rv GeoBoundingPolygonQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasGeo := tmp["geometry"]
if hasGeo {
var rv GeoShapeQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
_, hasCIDR := tmp["cidr"]
if hasCIDR {
var rv IPRangeQuery
err := util.UnmarshalJSON(input, &rv)
if err != nil {
return nil, err
}
return &rv, nil
}
return nil, fmt.Errorf("unknown query type")
}
// expandQuery traverses the input query tree and returns a new tree where
// query string queries have been expanded into base queries. Returned tree may
// reference queries from the input tree or new queries.
func expandQuery(m mapping.IndexMapping, query Query) (Query, error) {
var expand func(query Query) (Query, error)
var expandSlice func(queries []Query) ([]Query, error) = func(queries []Query) ([]Query, error) {
expanded := []Query{}
for _, q := range queries {
exp, err := expand(q)
if err != nil {
return nil, err
}
expanded = append(expanded, exp)
}
return expanded, nil
}
expand = func(query Query) (Query, error) {
switch q := query.(type) {
case *QueryStringQuery:
parsed, err := parseQuerySyntax(q.Query)
if err != nil {
return nil, fmt.Errorf("could not parse '%s': %s", q.Query, err)
}
return expand(parsed)
case *ConjunctionQuery:
children, err := expandSlice(q.Conjuncts)
if err != nil {
return nil, err
}
q.Conjuncts = children
return q, nil
case *DisjunctionQuery:
children, err := expandSlice(q.Disjuncts)
if err != nil {
return nil, err
}
q.Disjuncts = children
return q, nil
case *BooleanQuery:
var err error
q.Must, err = expand(q.Must)
if err != nil {
return nil, err
}
q.Should, err = expand(q.Should)
if err != nil {
return nil, err
}
q.MustNot, err = expand(q.MustNot)
if err != nil {
return nil, err
}
q.Filter, err = expand(q.Filter)
if err != nil {
return nil, err
}
return q, nil
default:
return query, nil
}
}
return expand(query)
}
// DumpQuery returns a string representation of the query tree, where query
// string queries have been expanded into base queries. The output format is
// meant for debugging purpose and may change in the future.
func DumpQuery(m mapping.IndexMapping, query Query) (string, error) {
q, err := expandQuery(m, query)
if err != nil {
return "", err
}
data, err := json.MarshalIndent(q, "", " ")
return string(data), err
}
// ExtractFields returns a set of fields referenced by the query.
// The returned set may be nil if the query does not explicitly reference any field
// and the DefaultSearchField is unset in the index mapping.
func ExtractFields(q Query, m mapping.IndexMapping, fs search.FieldSet) (search.FieldSet, error) {
if q == nil || m == nil {
return fs, nil
}
var err error
switch q := q.(type) {
case FieldableQuery:
f := q.Field()
if f == "" {
f = m.DefaultSearchField()
}
if f != "" {
if fs == nil {
fs = search.NewFieldSet()
}
fs.AddField(f)
}
case *QueryStringQuery:
var expandedQuery Query
expandedQuery, err = expandQuery(m, q)
if err == nil {
fs, err = ExtractFields(expandedQuery, m, fs)
}
case *BooleanQuery:
for _, subq := range []Query{q.Must, q.Should, q.MustNot, q.Filter} {
fs, err = ExtractFields(subq, m, fs)
if err != nil {
break
}
}
case *ConjunctionQuery:
for _, subq := range q.Conjuncts {
fs, err = ExtractFields(subq, m, fs)
if err != nil {
break
}
}
case *DisjunctionQuery:
for _, subq := range q.Disjuncts {
fs, err = ExtractFields(subq, m, fs)
if err != nil {
break
}
}
case *DocIDQuery, *MatchAllQuery:
if fs == nil {
fs = search.NewFieldSet()
}
fs.AddField("_id")
}
return fs, err
}
const (
FuzzyMatchType = iota
RegexpMatchType
PrefixMatchType
)
// ExtractSynonyms extracts synonyms from the query tree and returns a map of
// field-term pairs to their synonyms. The input query tree is traversed and
// for each term query, the synonyms are extracted from the synonym source
// associated with the field. The synonyms are then added to the provided map.
// The map is returned and may be nil if no synonyms were found.
func ExtractSynonyms(ctx context.Context, m mapping.SynonymMapping, r index.ThesaurusReader,
query Query, rv search.FieldTermSynonymMap,
) (search.FieldTermSynonymMap, error) {
if r == nil || m == nil || query == nil {
return rv, nil
}
var err error
resolveFieldAndSource := func(field string) (string, string) {
if field == "" {
field = m.DefaultSearchField()
}
return field, m.SynonymSourceForPath(field)
}
handleAnalyzer := func(analyzerName, field string) (analysis.Analyzer, error) {
if analyzerName == "" {
analyzerName = m.AnalyzerNameForPath(field)
}
analyzer := m.AnalyzerNamed(analyzerName)
if analyzer == nil {
return nil, fmt.Errorf("no analyzer named '%s' registered", analyzerName)
}
return analyzer, nil
}
switch q := query.(type) {
case *BooleanQuery:
rv, err = ExtractSynonyms(ctx, m, r, q.Must, rv)
if err != nil {
return nil, err
}
rv, err = ExtractSynonyms(ctx, m, r, q.Should, rv)
if err != nil {
return nil, err
}
rv, err = ExtractSynonyms(ctx, m, r, q.MustNot, rv)
if err != nil {
return nil, err
}
rv, err = ExtractSynonyms(ctx, m, r, q.Filter, rv)
if err != nil {
return nil, err
}
case *ConjunctionQuery:
for _, child := range q.Conjuncts {
rv, err = ExtractSynonyms(ctx, m, r, child, rv)
if err != nil {
return nil, err
}
}
case *DisjunctionQuery:
for _, child := range q.Disjuncts {
rv, err = ExtractSynonyms(ctx, m, r, child, rv)
if err != nil {
return nil, err
}
}
case *FuzzyQuery:
field, source := resolveFieldAndSource(q.FieldVal)
if source != "" {
fuzziness := q.Fuzziness
if q.autoFuzzy {
fuzziness = searcher.GetAutoFuzziness(q.Term)
}
rv, err = addSynonymsForTermWithMatchType(ctx, FuzzyMatchType, source, field, q.Term, fuzziness, q.Prefix, r, rv)
if err != nil {
return nil, err
}
}
case *MatchQuery, *MatchPhraseQuery:
var analyzerName, matchString, fieldVal string
var fuzziness, prefix int
var autoFuzzy bool
if mq, ok := q.(*MatchQuery); ok {
analyzerName, fieldVal, matchString, fuzziness, prefix, autoFuzzy = mq.Analyzer, mq.FieldVal, mq.Match, mq.Fuzziness, mq.Prefix, mq.autoFuzzy
} else if mpq, ok := q.(*MatchPhraseQuery); ok {
analyzerName, fieldVal, matchString, fuzziness, autoFuzzy = mpq.Analyzer, mpq.FieldVal, mpq.MatchPhrase, mpq.Fuzziness, mpq.autoFuzzy
}
field, source := resolveFieldAndSource(fieldVal)
if source != "" {
analyzer, err := handleAnalyzer(analyzerName, field)
if err != nil {
return nil, err
}
tokens := analyzer.Analyze([]byte(matchString))
for _, token := range tokens {
if autoFuzzy {
fuzziness = searcher.GetAutoFuzziness(string(token.Term))
}
rv, err = addSynonymsForTermWithMatchType(ctx, FuzzyMatchType, source, field, string(token.Term), fuzziness, prefix, r, rv)
if err != nil {
return nil, err
}
}
}
case *MultiPhraseQuery, *PhraseQuery:
var fieldVal string
var fuzziness int
var autoFuzzy bool
if mpq, ok := q.(*MultiPhraseQuery); ok {
fieldVal, fuzziness, autoFuzzy = mpq.FieldVal, mpq.Fuzziness, mpq.autoFuzzy
} else if pq, ok := q.(*PhraseQuery); ok {
fieldVal, fuzziness, autoFuzzy = pq.FieldVal, pq.Fuzziness, pq.autoFuzzy
}
field, source := resolveFieldAndSource(fieldVal)
if source != "" {
var terms []string
if mpq, ok := q.(*MultiPhraseQuery); ok {
for _, termGroup := range mpq.Terms {
terms = append(terms, termGroup...)
}
} else if pq, ok := q.(*PhraseQuery); ok {
terms = pq.Terms
}
for _, term := range terms {
if autoFuzzy {
fuzziness = searcher.GetAutoFuzziness(term)
}
rv, err = addSynonymsForTermWithMatchType(ctx, FuzzyMatchType, source, field, term, fuzziness, 0, r, rv)
if err != nil {
return nil, err
}
}
}
case *PrefixQuery:
field, source := resolveFieldAndSource(q.FieldVal)
if source != "" {
rv, err = addSynonymsForTermWithMatchType(ctx, PrefixMatchType, source, field, q.Prefix, 0, 0, r, rv)
if err != nil {
return nil, err
}
}
case *QueryStringQuery:
expanded, err := expandQuery(m, q)
if err != nil {
return nil, err
}
rv, err = ExtractSynonyms(ctx, m, r, expanded, rv)
if err != nil {
return nil, err
}
case *TermQuery:
field, source := resolveFieldAndSource(q.FieldVal)
if source != "" {
rv, err = addSynonymsForTerm(ctx, source, field, q.Term, r, rv)
if err != nil {
return nil, err
}
}
case *RegexpQuery:
field, source := resolveFieldAndSource(q.FieldVal)
if source != "" {
rv, err = addSynonymsForTermWithMatchType(ctx, RegexpMatchType, source, field, strings.TrimPrefix(q.Regexp, "^"), 0, 0, r, rv)
if err != nil {
return nil, err
}
}
case *WildcardQuery:
field, source := resolveFieldAndSource(q.FieldVal)
if source != "" {
rv, err = addSynonymsForTermWithMatchType(ctx, RegexpMatchType, source, field, wildcardRegexpReplacer.Replace(q.Wildcard), 0, 0, r, rv)
if err != nil {
return nil, err
}
}
}
return rv, nil
}
// addFuzzySynonymsForTerm finds all terms that match the given term with the
// given fuzziness and adds their synonyms to the provided map.
func addSynonymsForTermWithMatchType(ctx context.Context, matchType int, src, field, term string, fuzziness, prefix int,
r index.ThesaurusReader, rv search.FieldTermSynonymMap,
) (search.FieldTermSynonymMap, error) {
// Determine the terms based on the match type (fuzzy, prefix, or regexp)
var thesKeys index.ThesaurusKeys
var err error
var terms []string
switch matchType {
case FuzzyMatchType:
// Ensure valid fuzziness
if fuzziness == 0 {
rv, err = addSynonymsForTerm(ctx, src, field, term, r, rv)
if err != nil {
return nil, err
}
return rv, nil
}
if fuzziness > searcher.MaxFuzziness {
return nil, fmt.Errorf("fuzziness exceeds max (%d)", searcher.MaxFuzziness)
}
if fuzziness < 0 {
return nil, fmt.Errorf("invalid fuzziness, negative")
}
// Handle fuzzy match
prefixTerm := ""
for i, r := range term {
if i < prefix {
prefixTerm += string(r)
} else {
break
}
}
thesKeys, err = r.ThesaurusKeysFuzzy(src, term, fuzziness, prefixTerm)
case RegexpMatchType:
// Handle regexp match
thesKeys, err = r.ThesaurusKeysRegexp(src, term)
case PrefixMatchType:
// Handle prefix match
thesKeys, err = r.ThesaurusKeysPrefix(src, []byte(term))
default:
return nil, fmt.Errorf("invalid match type: %d", matchType)
}
if err != nil {
return nil, err
}
defer func() {
if cerr := thesKeys.Close(); cerr != nil && err == nil {
err = cerr
}
}()
// Collect the matching terms
terms = []string{}
tfd, err := thesKeys.Next()
for err == nil && tfd != nil {
terms = append(terms, tfd.Term)
tfd, err = thesKeys.Next()
}
if err != nil {
return nil, err
}
for _, synTerm := range terms {
rv, err = addSynonymsForTerm(ctx, src, field, synTerm, r, rv)
if err != nil {
return nil, err
}
}
return rv, nil
}
func addSynonymsForTerm(ctx context.Context, src, field, term string,
r index.ThesaurusReader, rv search.FieldTermSynonymMap,
) (search.FieldTermSynonymMap, error) {
termReader, err := r.ThesaurusTermReader(ctx, src, []byte(term))
if err != nil {
return nil, err
}
defer func() {
if cerr := termReader.Close(); cerr != nil && err == nil {
err = cerr
}
}()
var synonyms []string
synonym, err := termReader.Next()
for err == nil && synonym != "" {
synonyms = append(synonyms, synonym)
synonym, err = termReader.Next()
}
if err != nil {
return nil, err
}
if len(synonyms) > 0 {
if rv == nil {
rv = make(search.FieldTermSynonymMap)
}
if _, exists := rv[field]; !exists {
rv[field] = make(map[string][]string)
}
rv[field][term] = synonyms
}
return rv, nil
}
================================================
FILE: search/query/query_string.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
type QueryStringQuery struct {
Query string `json:"query"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewQueryStringQuery creates a new Query used for
// finding documents that satisfy a query string. The
// query string is a small query language for humans.
func NewQueryStringQuery(query string) *QueryStringQuery {
return &QueryStringQuery{
Query: query,
}
}
func (q *QueryStringQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *QueryStringQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *QueryStringQuery) Parse() (Query, error) {
return parseQuerySyntax(q.Query)
}
func (q *QueryStringQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
newQuery, err := parseQuerySyntax(q.Query)
if err != nil {
return nil, err
}
return newQuery.Searcher(ctx, i, m, options)
}
func (q *QueryStringQuery) Validate() error {
newQuery, err := parseQuerySyntax(q.Query)
if err != nil {
return err
}
if newQuery, ok := newQuery.(ValidatableQuery); ok {
return newQuery.Validate()
}
return nil
}
================================================
FILE: search/query/query_string.y
================================================
%{
package query
import (
"fmt"
"strconv"
"strings"
"time"
)
func logDebugGrammar(format string, v ...interface{}) {
if debugParser {
logger.Printf(format, v...)
}
}
%}
%union {
s string
n int
f float64
q Query
pf *float64}
%token tSTRING tPHRASE tPLUS tMINUS tCOLON tBOOST tNUMBER tSTRING tGREATER tLESS
tEQUAL tTILDE
%type tSTRING
%type tPHRASE
%type tNUMBER
%type posOrNegNumber
%type fieldName
%type tTILDE
%type tBOOST
%type searchBase
%type searchSuffix
%type searchPrefix
%%
input:
searchParts {
logDebugGrammar("INPUT")
};
searchParts:
searchPart searchParts {
logDebugGrammar("SEARCH PARTS")
}
|
searchPart {
logDebugGrammar("SEARCH PART")
};
searchPart:
searchPrefix searchBase searchSuffix {
query := $2
if $3 != nil {
if query, ok := query.(BoostableQuery); ok {
query.SetBoost(*$3)
}
}
switch($1) {
case queryShould:
yylex.(*lexerWrapper).query.AddShould(query)
case queryMust:
yylex.(*lexerWrapper).query.AddMust(query)
case queryMustNot:
yylex.(*lexerWrapper).query.AddMustNot(query)
}
};
searchPrefix:
/* empty */ {
$$ = queryShould
}
|
tPLUS {
logDebugGrammar("PLUS")
$$ = queryMust
}
|
tMINUS {
logDebugGrammar("MINUS")
$$ = queryMustNot
};
searchBase:
tSTRING {
str := $1
logDebugGrammar("STRING - %s", str)
var q FieldableQuery
if strings.HasPrefix(str, "/") && strings.HasSuffix(str, "/") {
q = NewRegexpQuery(str[1:len(str)-1])
} else if strings.ContainsAny(str, "*?"){
q = NewWildcardQuery(str)
} else {
q = NewMatchQuery(str)
}
$$ = q
}
|
tSTRING tTILDE {
str := $1
fuzziness, err := strconv.ParseFloat($2, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid fuzziness value: %v", err))
}
logDebugGrammar("FUZZY STRING - %s %f", str, fuzziness)
q := NewMatchQuery(str)
q.SetFuzziness(int(fuzziness))
$$ = q
}
|
fieldName tCOLON tSTRING tTILDE {
field := $1
str := $3
fuzziness, err := strconv.ParseFloat($4, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid fuzziness value: %v", err))
}
logDebugGrammar("FIELD - %s FUZZY STRING - %s %f", field, str, fuzziness)
q := NewMatchQuery(str)
q.SetFuzziness(int(fuzziness))
q.SetField(field)
$$ = q
}
|
tNUMBER {
str := $1
logDebugGrammar("STRING - %s", str)
q1 := NewMatchQuery(str)
val, err := strconv.ParseFloat($1, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q := NewDisjunctionQuery([]Query{q1,q2})
q.queryStringMode = true
$$ = q
}
|
tPHRASE {
phrase := $1
logDebugGrammar("PHRASE - %s", phrase)
q := NewMatchPhraseQuery(phrase)
$$ = q
}
|
fieldName tCOLON tSTRING {
field := $1
str := $3
logDebugGrammar("FIELD - %s STRING - %s", field, str)
var q FieldableQuery
if strings.HasPrefix(str, "/") && strings.HasSuffix(str, "/") {
q = NewRegexpQuery(str[1:len(str)-1])
} else if strings.ContainsAny(str, "*?"){
q = NewWildcardQuery(str)
} else {
q = NewMatchQuery(str)
}
q.SetField(field)
$$ = q
}
|
fieldName tCOLON posOrNegNumber {
field := $1
str := $3
logDebugGrammar("FIELD - %s STRING - %s", field, str)
q1 := NewMatchQuery(str)
q1.SetField(field)
val, err := strconv.ParseFloat($3, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q2.SetField(field)
q := NewDisjunctionQuery([]Query{q1,q2})
q.queryStringMode = true
$$ = q
}
|
fieldName tCOLON tPHRASE {
field := $1
phrase := $3
logDebugGrammar("FIELD - %s PHRASE - %s", field, phrase)
q := NewMatchPhraseQuery(phrase)
q.SetField(field)
$$ = q
}
|
fieldName tCOLON tGREATER posOrNegNumber {
field := $1
min, err := strconv.ParseFloat($4, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := false
logDebugGrammar("FIELD - GREATER THAN %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
q.SetField(field)
$$ = q
}
|
fieldName tCOLON tGREATER tEQUAL posOrNegNumber {
field := $1
min, err := strconv.ParseFloat($5, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := true
logDebugGrammar("FIELD - GREATER THAN OR EQUAL %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
q.SetField(field)
$$ = q
}
|
fieldName tCOLON tLESS posOrNegNumber {
field := $1
max, err := strconv.ParseFloat($4, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := false
logDebugGrammar("FIELD - LESS THAN %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
q.SetField(field)
$$ = q
}
|
fieldName tCOLON tLESS tEQUAL posOrNegNumber {
field := $1
max, err := strconv.ParseFloat($5, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := true
logDebugGrammar("FIELD - LESS THAN OR EQUAL %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
q.SetField(field)
$$ = q
}
|
fieldName tCOLON tGREATER tPHRASE {
field := $1
minInclusive := false
phrase := $4
logDebugGrammar("FIELD - GREATER THAN DATE %s", phrase)
minTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(minTime, time.Time{}, &minInclusive, nil)
q.SetField(field)
$$ = q
}
|
fieldName tCOLON tGREATER tEQUAL tPHRASE {
field := $1
minInclusive := true
phrase := $5
logDebugGrammar("FIELD - GREATER THAN OR EQUAL DATE %s", phrase)
minTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(minTime, time.Time{}, &minInclusive, nil)
q.SetField(field)
$$ = q
}
|
fieldName tCOLON tLESS tPHRASE {
field := $1
maxInclusive := false
phrase := $4
logDebugGrammar("FIELD - LESS THAN DATE %s", phrase)
maxTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(time.Time{}, maxTime, nil, &maxInclusive)
q.SetField(field)
$$ = q
}
|
fieldName tCOLON tLESS tEQUAL tPHRASE {
field := $1
maxInclusive := true
phrase := $5
logDebugGrammar("FIELD - LESS THAN OR EQUAL DATE %s", phrase)
maxTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(time.Time{}, maxTime, nil, &maxInclusive)
q.SetField(field)
$$ = q
};
searchSuffix:
/* empty */ {
$$ = nil
}
|
tBOOST {
$$ = nil
boost, err := strconv.ParseFloat($1, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid boost value: %v", err))
} else {
$$ = &boost
}
logDebugGrammar("BOOST %f", boost)
};
posOrNegNumber:
tNUMBER {
$$ = $1
}
|
tMINUS tNUMBER {
$$ = "-" + $2
};
fieldName:
tPHRASE {
$$ = $1
}
|
tSTRING {
$$ = $1
};
================================================
FILE: search/query/query_string.y.go
================================================
// Code generated by goyacc -o query_string.y.go query_string.y. DO NOT EDIT.
//line query_string.y:2
package query
import __yyfmt__ "fmt"
//line query_string.y:2
import (
"fmt"
"strconv"
"strings"
"time"
)
func logDebugGrammar(format string, v ...interface{}) {
if debugParser {
logger.Printf(format, v...)
}
}
//line query_string.y:17
type yySymType struct {
yys int
s string
n int
f float64
q Query
pf *float64
}
const tSTRING = 57346
const tPHRASE = 57347
const tPLUS = 57348
const tMINUS = 57349
const tCOLON = 57350
const tBOOST = 57351
const tNUMBER = 57352
const tGREATER = 57353
const tLESS = 57354
const tEQUAL = 57355
const tTILDE = 57356
var yyToknames = [...]string{
"$end",
"error",
"$unk",
"tSTRING",
"tPHRASE",
"tPLUS",
"tMINUS",
"tCOLON",
"tBOOST",
"tNUMBER",
"tGREATER",
"tLESS",
"tEQUAL",
"tTILDE",
}
var yyStatenames = [...]string{}
const yyEofCode = 1
const yyErrCode = 2
const yyInitialStackSize = 16
//line yacctab:1
var yyExca = [...]int{
-1, 1,
1, -1,
-2, 0,
-1, 3,
1, 3,
-2, 5,
-1, 9,
8, 29,
-2, 8,
-1, 12,
8, 28,
-2, 12,
}
const yyPrivate = 57344
const yyLast = 43
var yyAct = [...]int{
18, 17, 19, 24, 23, 15, 31, 22, 20, 21,
30, 27, 23, 23, 3, 22, 22, 14, 29, 26,
16, 25, 28, 35, 33, 23, 23, 32, 22, 22,
34, 9, 12, 1, 5, 6, 2, 11, 4, 13,
7, 8, 10,
}
var yyPact = [...]int{
28, -1000, -1000, 28, 27, -1000, -1000, -1000, 8, -9,
12, -1000, -1000, -1000, -1000, -1000, -3, -11, -1000, -1000,
6, 5, -1000, -4, -1000, -1000, 19, -1000, -1000, 18,
-1000, -1000, -1000, -1000, -1000, -1000,
}
var yyPgo = [...]int{
0, 0, 42, 41, 39, 38, 33, 36, 14,
}
var yyR1 = [...]int{
0, 6, 7, 7, 8, 5, 5, 5, 3, 3,
3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
3, 3, 3, 3, 4, 4, 1, 1, 2, 2,
}
var yyR2 = [...]int{
0, 1, 2, 1, 3, 0, 1, 1, 1, 2,
4, 1, 1, 3, 3, 3, 4, 5, 4, 5,
4, 5, 4, 5, 0, 1, 1, 2, 1, 1,
}
var yyChk = [...]int{
-1000, -6, -7, -8, -5, 6, 7, -7, -3, 4,
-2, 10, 5, -4, 9, 14, 8, 4, -1, 5,
11, 12, 10, 7, 14, -1, 13, 5, -1, 13,
5, 10, -1, 5, -1, 5,
}
var yyDef = [...]int{
5, -2, 1, -2, 0, 6, 7, 2, 24, -2,
0, 11, -2, 4, 25, 9, 0, 13, 14, 15,
0, 0, 26, 0, 10, 16, 0, 20, 18, 0,
22, 27, 17, 21, 19, 23,
}
var yyTok1 = [...]int{
1,
}
var yyTok2 = [...]int{
2, 3, 4, 5, 6, 7, 8, 9, 10, 11,
12, 13, 14,
}
var yyTok3 = [...]int{
0,
}
var yyErrorMessages = [...]struct {
state int
token int
msg string
}{}
//line yaccpar:1
/* parser for yacc output */
var (
yyDebug = 0
yyErrorVerbose = false
)
type yyLexer interface {
Lex(lval *yySymType) int
Error(s string)
}
type yyParser interface {
Parse(yyLexer) int
Lookahead() int
}
type yyParserImpl struct {
lval yySymType
stack [yyInitialStackSize]yySymType
char int
}
func (p *yyParserImpl) Lookahead() int {
return p.char
}
func yyNewParser() yyParser {
return &yyParserImpl{}
}
const yyFlag = -1000
func yyTokname(c int) string {
if c >= 1 && c-1 < len(yyToknames) {
if yyToknames[c-1] != "" {
return yyToknames[c-1]
}
}
return __yyfmt__.Sprintf("tok-%v", c)
}
func yyStatname(s int) string {
if s >= 0 && s < len(yyStatenames) {
if yyStatenames[s] != "" {
return yyStatenames[s]
}
}
return __yyfmt__.Sprintf("state-%v", s)
}
func yyErrorMessage(state, lookAhead int) string {
const TOKSTART = 4
if !yyErrorVerbose {
return "syntax error"
}
for _, e := range yyErrorMessages {
if e.state == state && e.token == lookAhead {
return "syntax error: " + e.msg
}
}
res := "syntax error: unexpected " + yyTokname(lookAhead)
// To match Bison, suggest at most four expected tokens.
expected := make([]int, 0, 4)
// Look for shiftable tokens.
base := yyPact[state]
for tok := TOKSTART; tok-1 < len(yyToknames); tok++ {
if n := base + tok; n >= 0 && n < yyLast && yyChk[yyAct[n]] == tok {
if len(expected) == cap(expected) {
return res
}
expected = append(expected, tok)
}
}
if yyDef[state] == -2 {
i := 0
for yyExca[i] != -1 || yyExca[i+1] != state {
i += 2
}
// Look for tokens that we accept or reduce.
for i += 2; yyExca[i] >= 0; i += 2 {
tok := yyExca[i]
if tok < TOKSTART || yyExca[i+1] == 0 {
continue
}
if len(expected) == cap(expected) {
return res
}
expected = append(expected, tok)
}
// If the default action is to accept or reduce, give up.
if yyExca[i+1] != 0 {
return res
}
}
for i, tok := range expected {
if i == 0 {
res += ", expecting "
} else {
res += " or "
}
res += yyTokname(tok)
}
return res
}
func yylex1(lex yyLexer, lval *yySymType) (char, token int) {
token = 0
char = lex.Lex(lval)
if char <= 0 {
token = yyTok1[0]
goto out
}
if char < len(yyTok1) {
token = yyTok1[char]
goto out
}
if char >= yyPrivate {
if char < yyPrivate+len(yyTok2) {
token = yyTok2[char-yyPrivate]
goto out
}
}
for i := 0; i < len(yyTok3); i += 2 {
token = yyTok3[i+0]
if token == char {
token = yyTok3[i+1]
goto out
}
}
out:
if token == 0 {
token = yyTok2[1] /* unknown char */
}
if yyDebug >= 3 {
__yyfmt__.Printf("lex %s(%d)\n", yyTokname(token), uint(char))
}
return char, token
}
func yyParse(yylex yyLexer) int {
return yyNewParser().Parse(yylex)
}
func (yyrcvr *yyParserImpl) Parse(yylex yyLexer) int {
var yyn int
var yyVAL yySymType
var yyDollar []yySymType
_ = yyDollar // silence set and not used
yyS := yyrcvr.stack[:]
Nerrs := 0 /* number of errors */
Errflag := 0 /* error recovery flag */
yystate := 0
yyrcvr.char = -1
yytoken := -1 // yyrcvr.char translated into internal numbering
defer func() {
// Make sure we report no lookahead when not parsing.
yystate = -1
yyrcvr.char = -1
yytoken = -1
}()
yyp := -1
goto yystack
ret0:
return 0
ret1:
return 1
yystack:
/* put a state and value onto the stack */
if yyDebug >= 4 {
__yyfmt__.Printf("char %v in %v\n", yyTokname(yytoken), yyStatname(yystate))
}
yyp++
if yyp >= len(yyS) {
nyys := make([]yySymType, len(yyS)*2)
copy(nyys, yyS)
yyS = nyys
}
yyS[yyp] = yyVAL
yyS[yyp].yys = yystate
yynewstate:
yyn = yyPact[yystate]
if yyn <= yyFlag {
goto yydefault /* simple state */
}
if yyrcvr.char < 0 {
yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval)
}
yyn += yytoken
if yyn < 0 || yyn >= yyLast {
goto yydefault
}
yyn = yyAct[yyn]
if yyChk[yyn] == yytoken { /* valid shift */
yyrcvr.char = -1
yytoken = -1
yyVAL = yyrcvr.lval
yystate = yyn
if Errflag > 0 {
Errflag--
}
goto yystack
}
yydefault:
/* default state action */
yyn = yyDef[yystate]
if yyn == -2 {
if yyrcvr.char < 0 {
yyrcvr.char, yytoken = yylex1(yylex, &yyrcvr.lval)
}
/* look through exception table */
xi := 0
for {
if yyExca[xi+0] == -1 && yyExca[xi+1] == yystate {
break
}
xi += 2
}
for xi += 2; ; xi += 2 {
yyn = yyExca[xi+0]
if yyn < 0 || yyn == yytoken {
break
}
}
yyn = yyExca[xi+1]
if yyn < 0 {
goto ret0
}
}
if yyn == 0 {
/* error ... attempt to resume parsing */
switch Errflag {
case 0: /* brand new error */
yylex.Error(yyErrorMessage(yystate, yytoken))
Nerrs++
if yyDebug >= 1 {
__yyfmt__.Printf("%s", yyStatname(yystate))
__yyfmt__.Printf(" saw %s\n", yyTokname(yytoken))
}
fallthrough
case 1, 2: /* incompletely recovered error ... try again */
Errflag = 3
/* find a state where "error" is a legal shift action */
for yyp >= 0 {
yyn = yyPact[yyS[yyp].yys] + yyErrCode
if yyn >= 0 && yyn < yyLast {
yystate = yyAct[yyn] /* simulate a shift of "error" */
if yyChk[yystate] == yyErrCode {
goto yystack
}
}
/* the current p has no shift on "error", pop stack */
if yyDebug >= 2 {
__yyfmt__.Printf("error recovery pops state %d\n", yyS[yyp].yys)
}
yyp--
}
/* there is no state on the stack with an error shift ... abort */
goto ret1
case 3: /* no shift yet; clobber input char */
if yyDebug >= 2 {
__yyfmt__.Printf("error recovery discards %s\n", yyTokname(yytoken))
}
if yytoken == yyEofCode {
goto ret1
}
yyrcvr.char = -1
yytoken = -1
goto yynewstate /* try again in the same state */
}
}
/* reduction by production yyn */
if yyDebug >= 2 {
__yyfmt__.Printf("reduce %v in:\n\t%v\n", yyn, yyStatname(yystate))
}
yynt := yyn
yypt := yyp
_ = yypt // guard against "declared and not used"
yyp -= yyR2[yyn]
// yyp is now the index of $0. Perform the default action. Iff the
// reduced production is ε, $1 is possibly out of range.
if yyp+1 >= len(yyS) {
nyys := make([]yySymType, len(yyS)*2)
copy(nyys, yyS)
yyS = nyys
}
yyVAL = yyS[yyp+1]
/* consult goto table to find next state */
yyn = yyR1[yyn]
yyg := yyPgo[yyn]
yyj := yyg + yyS[yyp].yys + 1
if yyj >= yyLast {
yystate = yyAct[yyg]
} else {
yystate = yyAct[yyj]
if yyChk[yystate] != -yyn {
yystate = yyAct[yyg]
}
}
// dummy call; replaced with literal code
switch yynt {
case 1:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:41
{
logDebugGrammar("INPUT")
}
case 2:
yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:46
{
logDebugGrammar("SEARCH PARTS")
}
case 3:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:50
{
logDebugGrammar("SEARCH PART")
}
case 4:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:55
{
query := yyDollar[2].q
if yyDollar[3].pf != nil {
if query, ok := query.(BoostableQuery); ok {
query.SetBoost(*yyDollar[3].pf)
}
}
switch yyDollar[1].n {
case queryShould:
yylex.(*lexerWrapper).query.AddShould(query)
case queryMust:
yylex.(*lexerWrapper).query.AddMust(query)
case queryMustNot:
yylex.(*lexerWrapper).query.AddMustNot(query)
}
}
case 5:
yyDollar = yyS[yypt-0 : yypt+1]
//line query_string.y:74
{
yyVAL.n = queryShould
}
case 6:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:78
{
logDebugGrammar("PLUS")
yyVAL.n = queryMust
}
case 7:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:83
{
logDebugGrammar("MINUS")
yyVAL.n = queryMustNot
}
case 8:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:89
{
str := yyDollar[1].s
logDebugGrammar("STRING - %s", str)
var q FieldableQuery
if strings.HasPrefix(str, "/") && strings.HasSuffix(str, "/") {
q = NewRegexpQuery(str[1 : len(str)-1])
} else if strings.ContainsAny(str, "*?") {
q = NewWildcardQuery(str)
} else {
q = NewMatchQuery(str)
}
yyVAL.q = q
}
case 9:
yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:103
{
str := yyDollar[1].s
fuzziness, err := strconv.ParseFloat(yyDollar[2].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid fuzziness value: %v", err))
}
logDebugGrammar("FUZZY STRING - %s %f", str, fuzziness)
q := NewMatchQuery(str)
q.SetFuzziness(int(fuzziness))
yyVAL.q = q
}
case 10:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:115
{
field := yyDollar[1].s
str := yyDollar[3].s
fuzziness, err := strconv.ParseFloat(yyDollar[4].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid fuzziness value: %v", err))
}
logDebugGrammar("FIELD - %s FUZZY STRING - %s %f", field, str, fuzziness)
q := NewMatchQuery(str)
q.SetFuzziness(int(fuzziness))
q.SetField(field)
yyVAL.q = q
}
case 11:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:129
{
str := yyDollar[1].s
logDebugGrammar("STRING - %s", str)
q1 := NewMatchQuery(str)
val, err := strconv.ParseFloat(yyDollar[1].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q := NewDisjunctionQuery([]Query{q1, q2})
q.queryStringMode = true
yyVAL.q = q
}
case 12:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:144
{
phrase := yyDollar[1].s
logDebugGrammar("PHRASE - %s", phrase)
q := NewMatchPhraseQuery(phrase)
yyVAL.q = q
}
case 13:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:151
{
field := yyDollar[1].s
str := yyDollar[3].s
logDebugGrammar("FIELD - %s STRING - %s", field, str)
var q FieldableQuery
if strings.HasPrefix(str, "/") && strings.HasSuffix(str, "/") {
q = NewRegexpQuery(str[1 : len(str)-1])
} else if strings.ContainsAny(str, "*?") {
q = NewWildcardQuery(str)
} else {
q = NewMatchQuery(str)
}
q.SetField(field)
yyVAL.q = q
}
case 14:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:167
{
field := yyDollar[1].s
str := yyDollar[3].s
logDebugGrammar("FIELD - %s STRING - %s", field, str)
q1 := NewMatchQuery(str)
q1.SetField(field)
val, err := strconv.ParseFloat(yyDollar[3].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
inclusive := true
q2 := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q2.SetField(field)
q := NewDisjunctionQuery([]Query{q1, q2})
q.queryStringMode = true
yyVAL.q = q
}
case 15:
yyDollar = yyS[yypt-3 : yypt+1]
//line query_string.y:185
{
field := yyDollar[1].s
phrase := yyDollar[3].s
logDebugGrammar("FIELD - %s PHRASE - %s", field, phrase)
q := NewMatchPhraseQuery(phrase)
q.SetField(field)
yyVAL.q = q
}
case 16:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:194
{
field := yyDollar[1].s
min, err := strconv.ParseFloat(yyDollar[4].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := false
logDebugGrammar("FIELD - GREATER THAN %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
q.SetField(field)
yyVAL.q = q
}
case 17:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:207
{
field := yyDollar[1].s
min, err := strconv.ParseFloat(yyDollar[5].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
minInclusive := true
logDebugGrammar("FIELD - GREATER THAN OR EQUAL %f", min)
q := NewNumericRangeInclusiveQuery(&min, nil, &minInclusive, nil)
q.SetField(field)
yyVAL.q = q
}
case 18:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:220
{
field := yyDollar[1].s
max, err := strconv.ParseFloat(yyDollar[4].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := false
logDebugGrammar("FIELD - LESS THAN %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
q.SetField(field)
yyVAL.q = q
}
case 19:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:233
{
field := yyDollar[1].s
max, err := strconv.ParseFloat(yyDollar[5].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("error parsing number: %v", err))
}
maxInclusive := true
logDebugGrammar("FIELD - LESS THAN OR EQUAL %f", max)
q := NewNumericRangeInclusiveQuery(nil, &max, nil, &maxInclusive)
q.SetField(field)
yyVAL.q = q
}
case 20:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:246
{
field := yyDollar[1].s
minInclusive := false
phrase := yyDollar[4].s
logDebugGrammar("FIELD - GREATER THAN DATE %s", phrase)
minTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(minTime, time.Time{}, &minInclusive, nil)
q.SetField(field)
yyVAL.q = q
}
case 21:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:261
{
field := yyDollar[1].s
minInclusive := true
phrase := yyDollar[5].s
logDebugGrammar("FIELD - GREATER THAN OR EQUAL DATE %s", phrase)
minTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(minTime, time.Time{}, &minInclusive, nil)
q.SetField(field)
yyVAL.q = q
}
case 22:
yyDollar = yyS[yypt-4 : yypt+1]
//line query_string.y:276
{
field := yyDollar[1].s
maxInclusive := false
phrase := yyDollar[4].s
logDebugGrammar("FIELD - LESS THAN DATE %s", phrase)
maxTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(time.Time{}, maxTime, nil, &maxInclusive)
q.SetField(field)
yyVAL.q = q
}
case 23:
yyDollar = yyS[yypt-5 : yypt+1]
//line query_string.y:291
{
field := yyDollar[1].s
maxInclusive := true
phrase := yyDollar[5].s
logDebugGrammar("FIELD - LESS THAN OR EQUAL DATE %s", phrase)
maxTime, err := queryTimeFromString(phrase)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid time: %v", err))
}
q := NewDateRangeInclusiveQuery(time.Time{}, maxTime, nil, &maxInclusive)
q.SetField(field)
yyVAL.q = q
}
case 24:
yyDollar = yyS[yypt-0 : yypt+1]
//line query_string.y:307
{
yyVAL.pf = nil
}
case 25:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:311
{
yyVAL.pf = nil
boost, err := strconv.ParseFloat(yyDollar[1].s, 64)
if err != nil {
yylex.(*lexerWrapper).lex.Error(fmt.Sprintf("invalid boost value: %v", err))
} else {
yyVAL.pf = &boost
}
logDebugGrammar("BOOST %f", boost)
}
case 26:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:323
{
yyVAL.s = yyDollar[1].s
}
case 27:
yyDollar = yyS[yypt-2 : yypt+1]
//line query_string.y:327
{
yyVAL.s = "-" + yyDollar[2].s
}
case 28:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:332
{
yyVAL.s = yyDollar[1].s
}
case 29:
yyDollar = yyS[yypt-1 : yypt+1]
//line query_string.y:336
{
yyVAL.s = yyDollar[1].s
}
}
goto yystack /* stack new state and value */
}
================================================
FILE: search/query/query_string_lex.go
================================================
// Copyright (c) 2016 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"bufio"
"io"
"strings"
"unicode"
)
const reservedChars = "+-=&|>', '<', '=':
l.buf += string(next)
return singleCharOpState, true
case '^':
return inBoostState, true
case '~':
return inTildeState, true
}
switch {
case !l.inEscape && next == '\\':
l.inEscape = true
return startState, true
case unicode.IsDigit(next):
l.buf += string(next)
return inNumOrStrState, true
case !unicode.IsSpace(next):
l.buf += string(next)
return inStrState, true
}
// doesn't look like anything, just eat it and stay here
l.reset()
return startState, true
}
func inPhraseState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// unterminated phrase eats the phrase
if eof {
l.Error("unterminated quote")
return nil, false
}
// only a non-escaped " ends the phrase
if !l.inEscape && next == '"' {
// end phrase
l.nextTokenType = tPHRASE
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("PHRASE - '%s'", l.nextToken.s)
l.reset()
return startState, true
} else if !l.inEscape && next == '\\' {
l.inEscape = true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
} else {
l.buf += string(next)
}
return inPhraseState, true
}
func singleCharOpState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
l.nextToken = &yySymType{}
switch l.buf {
case "+":
l.nextTokenType = tPLUS
logDebugTokens("PLUS")
case "-":
l.nextTokenType = tMINUS
logDebugTokens("MINUS")
case ":":
l.nextTokenType = tCOLON
logDebugTokens("COLON")
case ">":
l.nextTokenType = tGREATER
logDebugTokens("GREATER")
case "<":
l.nextTokenType = tLESS
logDebugTokens("LESS")
case "=":
l.nextTokenType = tEQUAL
logDebugTokens("EQUAL")
}
l.reset()
return startState, false
}
func inBoostState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// only a non-escaped space ends the boost (or eof)
if eof || (!l.inEscape && next == ' ') {
// end boost
l.nextTokenType = tBOOST
if l.buf == "" {
l.buf = "1"
}
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("BOOST - '%s'", l.nextToken.s)
l.reset()
return startState, true
} else if !l.inEscape && next == '\\' {
l.inEscape = true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
} else {
l.buf += string(next)
}
return inBoostState, true
}
func inTildeState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// only a non-escaped space ends the tilde (or eof)
if eof || (!l.inEscape && next == ' ') {
// end tilde
l.nextTokenType = tTILDE
if l.buf == "" {
l.buf = "1"
}
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("TILDE - '%s'", l.nextToken.s)
l.reset()
return startState, true
} else if !l.inEscape && next == '\\' {
l.inEscape = true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
} else {
l.buf += string(next)
}
return inTildeState, true
}
func inNumOrStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// end on non-escaped space, colon, tilde, boost (or eof)
if eof || (!l.inEscape && (next == ' ' || next == ':' || next == '^' || next == '~')) {
// end number
l.nextTokenType = tNUMBER
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("NUMBER - '%s'", l.nextToken.s)
l.reset()
consumed := true
if !eof && (next == ':' || next == '^' || next == '~') {
consumed = false
}
return startState, consumed
} else if !l.inEscape && next == '\\' {
l.inEscape = true
return inNumOrStrState, true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
// go directly to string, no successfully or unsuccessfully
// escaped string results in a valid number
return inStrState, true
}
// see where to go
if !l.seenDot && next == '.' {
// stay in this state
l.seenDot = true
l.buf += string(next)
return inNumOrStrState, true
} else if unicode.IsDigit(next) {
l.buf += string(next)
return inNumOrStrState, true
}
// doesn't look like an number, transition
l.buf += string(next)
return inStrState, true
}
func inStrState(l *queryStringLex, next rune, eof bool) (lexState, bool) {
// end on non-escaped space, colon, tilde, boost (or eof)
if eof || (!l.inEscape && (next == ' ' || next == ':' || next == '^' || next == '~')) {
// end string
l.nextTokenType = tSTRING
l.nextToken = &yySymType{
s: l.buf,
}
logDebugTokens("STRING - '%s'", l.nextToken.s)
l.reset()
consumed := true
if !eof && (next == ':' || next == '^' || next == '~') {
consumed = false
}
return startState, consumed
} else if !l.inEscape && next == '\\' {
l.inEscape = true
} else if l.inEscape {
// if in escape, end it
l.inEscape = false
l.buf += unescape(string(next))
} else {
l.buf += string(next)
}
return inStrState, true
}
func logDebugTokens(format string, v ...interface{}) {
if debugLexer {
logger.Printf(format, v...)
}
}
================================================
FILE: search/query/query_string_lex_test.go
================================================
package query
import (
"reflect"
"strings"
"testing"
)
func TestLexer(t *testing.T) {
tests := []struct {
input string
tokens []token
}{
{
input: "test",
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "test",
},
},
},
},
{
input: "127.0.0.1",
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "127.0.0.1",
},
},
},
},
{
input: `"test phrase 1"`,
tokens: []token{
{
typ: tPHRASE,
lval: yySymType{
s: "test phrase 1",
},
},
},
},
{
input: "field:test",
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tSTRING,
lval: yySymType{
s: "test",
},
},
},
},
{
input: "field:t-est",
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tSTRING,
lval: yySymType{
s: "t-est",
},
},
},
},
{
input: "field:t+est",
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tSTRING,
lval: yySymType{
s: "t+est",
},
},
},
},
{
input: "field:t>est",
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tSTRING,
lval: yySymType{
s: "t>est",
},
},
},
},
{
input: "field:t5`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tGREATER,
},
{
typ: tNUMBER,
lval: yySymType{
s: "5",
},
},
},
},
{
input: `field:>=5`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tGREATER,
},
{
typ: tEQUAL,
},
{
typ: tNUMBER,
lval: yySymType{
s: "5",
},
},
},
},
{
input: `field:<5`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tLESS,
},
{
typ: tNUMBER,
lval: yySymType{
s: "5",
},
},
},
},
{
input: `field:<=5`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tLESS,
},
{
typ: tEQUAL,
},
{
typ: tNUMBER,
lval: yySymType{
s: "5",
},
},
},
},
{
input: "field:-5",
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tMINUS,
},
{
typ: tNUMBER,
lval: yySymType{
s: "5",
},
},
},
},
{
input: `field:>-5`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tGREATER,
},
{
typ: tMINUS,
},
{
typ: tNUMBER,
lval: yySymType{
s: "5",
},
},
},
},
{
input: `field:>=-5`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tGREATER,
},
{
typ: tEQUAL,
},
{
typ: tMINUS,
},
{
typ: tNUMBER,
lval: yySymType{
s: "5",
},
},
},
},
{
input: `field:<-5`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tLESS,
},
{
typ: tMINUS,
},
{
typ: tNUMBER,
lval: yySymType{
s: "5",
},
},
},
},
{
input: `field:<=-5`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tLESS,
},
{
typ: tEQUAL,
},
{
typ: tMINUS,
},
{
typ: tNUMBER,
lval: yySymType{
s: "5",
},
},
},
},
{
input: `field:>"2006-01-02T15:04:05Z"`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tGREATER,
},
{
typ: tPHRASE,
lval: yySymType{
s: "2006-01-02T15:04:05Z",
},
},
},
},
{
input: `field:>="2006-01-02T15:04:05Z"`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tGREATER,
},
{
typ: tEQUAL,
},
{
typ: tPHRASE,
lval: yySymType{
s: "2006-01-02T15:04:05Z",
},
},
},
},
{
input: `field:<"2006-01-02T15:04:05Z"`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tLESS,
},
{
typ: tPHRASE,
lval: yySymType{
s: "2006-01-02T15:04:05Z",
},
},
},
},
{
input: `field:<="2006-01-02T15:04:05Z"`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "field",
},
},
{
typ: tCOLON,
},
{
typ: tLESS,
},
{
typ: tEQUAL,
},
{
typ: tPHRASE,
lval: yySymType{
s: "2006-01-02T15:04:05Z",
},
},
},
},
{
input: `/mar.*ty/`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: `/mar.*ty/`,
},
},
},
},
{
input: `name:/mar.*ty/`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "name",
},
},
{
typ: tCOLON,
},
{
typ: tSTRING,
lval: yySymType{
s: `/mar.*ty/`,
},
},
},
},
{
input: `mart*`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: `mart*`,
},
},
},
},
{
input: `name:mart*`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "name",
},
},
{
typ: tCOLON,
},
{
typ: tSTRING,
lval: yySymType{
s: `mart*`,
},
},
},
},
{
input: `name\:marty`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: `name:marty`,
},
},
},
},
{
input: `name:marty\:couchbase`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "name",
},
},
{
typ: tCOLON,
},
{
typ: tSTRING,
lval: yySymType{
s: `marty:couchbase`,
},
},
},
},
{
input: `marty\ couchbase`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: `marty couchbase`,
},
},
},
},
{
input: `\+marty`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: `+marty`,
},
},
},
},
{
input: `\-marty`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: `-marty`,
},
},
},
},
{
input: `"what does \"quote\" mean"`,
tokens: []token{
{
typ: tPHRASE,
lval: yySymType{
s: `what does "quote" mean`,
},
},
},
},
{
input: `can\ i\ escap\e`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: `can i escap\e`,
},
},
},
},
{
input: ` what`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: `what`,
},
},
},
},
{
input: `term^`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: `term`,
},
},
{
typ: tBOOST,
lval: yySymType{
s: "1",
},
},
},
},
{
input: `3.0\:`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: `3.0:`,
},
},
},
},
{
input: `3.0\a`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: `3.0\a`,
},
},
},
},
{
input: `age:65^10`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "age",
},
},
{
typ: tCOLON,
},
{
typ: tNUMBER,
lval: yySymType{
s: "65",
},
},
{
typ: tBOOST,
lval: yySymType{
s: "10",
},
},
},
},
{
input: `age:65^10 age:18^5`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "age",
},
},
{
typ: tCOLON,
},
{
typ: tNUMBER,
lval: yySymType{
s: "65",
},
},
{
typ: tBOOST,
lval: yySymType{
s: "10",
},
},
{
typ: tSTRING,
lval: yySymType{
s: "age",
},
},
{
typ: tCOLON,
},
{
typ: tNUMBER,
lval: yySymType{
s: "18",
},
},
{
typ: tBOOST,
lval: yySymType{
s: "5",
},
},
},
},
{
input: `age:65~2`,
tokens: []token{
{
typ: tSTRING,
lval: yySymType{
s: "age",
},
},
{
typ: tCOLON,
},
{
typ: tNUMBER,
lval: yySymType{
s: "65",
},
},
{
typ: tTILDE,
lval: yySymType{
s: "2",
},
},
},
},
{
input: `65:cat`,
tokens: []token{
{
typ: tNUMBER,
lval: yySymType{
s: "65",
},
},
{
typ: tCOLON,
},
{
typ: tSTRING,
lval: yySymType{
s: "cat",
},
},
},
},
}
for _, test := range tests {
test := test
t.Run(test.input, func(t *testing.T) {
r := strings.NewReader(test.input)
l := newQueryStringLex(r)
var tokens []token
var lval yySymType
rv := l.Lex(&lval)
for rv > 0 {
//tokenTypes = append(tokenTypes, rv)
tokens = append(tokens, token{typ: rv, lval: lval})
lval.s = ""
lval.n = 0
rv = l.Lex(&lval)
}
if !reflect.DeepEqual(tokens, test.tokens) {
t.Fatalf("\nexpected: %#v\n got: %#v\n", test.tokens, tokens)
}
})
}
}
type token struct {
typ int
lval yySymType
}
================================================
FILE: search/query/query_string_parser.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
// as of Go 1.8 this requires the goyacc external tool
// available from golang.org/x/tools/cmd/goyacc
//go:generate goyacc -o query_string.y.go query_string.y
//go:generate sed -i.tmp -e 1d query_string.y.go
//go:generate rm query_string.y.go.tmp
// note: OSX sed and gnu sed handle the -i (in-place) option differently.
// using -i.tmp works on both, at the expense of having to remove
// the unsightly .tmp files
package query
import (
"fmt"
"strings"
)
var debugParser bool
var debugLexer bool
func parseQuerySyntax(query string) (rq Query, err error) {
if query == "" {
return NewMatchNoneQuery(), nil
}
lex := newLexerWrapper(newQueryStringLex(strings.NewReader(query)))
doParse(lex)
if len(lex.errs) > 0 {
return nil, fmt.Errorf("%s", strings.Join(lex.errs, "\n"))
}
return lex.query, nil
}
func doParse(lex *lexerWrapper) {
defer func() {
r := recover()
if r != nil {
lex.errs = append(lex.errs, fmt.Sprintf("parse error: %v", r))
}
}()
yyParse(lex)
}
const (
queryShould = iota
queryMust
queryMustNot
)
type lexerWrapper struct {
lex yyLexer
errs []string
query *BooleanQuery
}
func newLexerWrapper(lex yyLexer) *lexerWrapper {
return &lexerWrapper{
lex: lex,
query: NewBooleanQueryForQueryString(nil, nil, nil),
}
}
func (l *lexerWrapper) Lex(lval *yySymType) int {
return l.lex.Lex(lval)
}
func (l *lexerWrapper) Error(s string) {
l.errs = append(l.errs, s)
}
================================================
FILE: search/query/query_string_parser_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"reflect"
"strings"
"testing"
"time"
"github.com/blevesearch/bleve/v2/mapping"
)
func TestQuerySyntaxParserValid(t *testing.T) {
thirtyThreePointOh := 33.0
twoPointOh := 2.0
fivePointOh := 5.0
minusFivePointOh := -5.0
theTruth := true
theFalsehood := false
theDate, err := time.Parse(time.RFC3339, "2006-01-02T15:04:05Z")
if err != nil {
t.Fatal(err)
}
tests := []struct {
input string
result Query
mapping mapping.IndexMapping
}{
{
input: "test",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
NewMatchQuery("test"),
},
nil),
},
{
input: "127.0.0.1",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
NewMatchQuery("127.0.0.1"),
},
nil),
},
{
input: `"test phrase 1"`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
NewMatchPhraseQuery("test phrase 1"),
},
nil),
},
{
input: "field:test",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewMatchQuery("test")
q.SetField("field")
return q
}(),
},
nil),
},
// - is allowed inside a term, just not the start
{
input: "field:t-est",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewMatchQuery("t-est")
q.SetField("field")
return q
}(),
},
nil),
},
// + is allowed inside a term, just not the start
{
input: "field:t+est",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewMatchQuery("t+est")
q.SetField("field")
return q
}(),
},
nil),
},
// > is allowed inside a term, just not the start
{
input: "field:t>est",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewMatchQuery("t>est")
q.SetField("field")
return q
}(),
},
nil),
},
// < is allowed inside a term, just not the start
{
input: "field:t5`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewNumericRangeInclusiveQuery(&fivePointOh, nil, &theFalsehood, nil)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:>=5`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewNumericRangeInclusiveQuery(&fivePointOh, nil, &theTruth, nil)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:<5`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewNumericRangeInclusiveQuery(nil, &fivePointOh, nil, &theFalsehood)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:<=5`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewNumericRangeInclusiveQuery(nil, &fivePointOh, nil, &theTruth)
q.SetField("field")
return q
}(),
},
nil),
},
// new range tests with negative number
{
input: "field:-5",
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
qo := NewDisjunctionQuery(
[]Query{
func() Query {
q := NewMatchQuery("-5")
q.SetField("field")
return q
}(),
func() Query {
q := NewNumericRangeInclusiveQuery(&minusFivePointOh, &minusFivePointOh, &theTruth, &theTruth)
q.SetField("field")
return q
}(),
})
qo.queryStringMode = true
return qo
}(),
},
nil),
},
{
input: `field:>-5`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewNumericRangeInclusiveQuery(&minusFivePointOh, nil, &theFalsehood, nil)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:>=-5`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewNumericRangeInclusiveQuery(&minusFivePointOh, nil, &theTruth, nil)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:<-5`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewNumericRangeInclusiveQuery(nil, &minusFivePointOh, nil, &theFalsehood)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:<=-5`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewNumericRangeInclusiveQuery(nil, &minusFivePointOh, nil, &theTruth)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:>"2006-01-02T15:04:05Z"`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewDateRangeInclusiveQuery(theDate, time.Time{}, &theFalsehood, nil)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:>="2006-01-02T15:04:05Z"`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewDateRangeInclusiveQuery(theDate, time.Time{}, &theTruth, nil)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:<"2006-01-02T15:04:05Z"`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewDateRangeInclusiveQuery(time.Time{}, theDate, nil, &theFalsehood)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `field:<="2006-01-02T15:04:05Z"`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewDateRangeInclusiveQuery(time.Time{}, theDate, nil, &theTruth)
q.SetField("field")
return q
}(),
},
nil),
},
{
input: `/mar.*ty/`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
NewRegexpQuery("mar.*ty"),
},
nil),
},
{
input: `name:/mar.*ty/`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewRegexpQuery("mar.*ty")
q.SetField("name")
return q
}(),
},
nil),
},
{
input: `mart*`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
NewWildcardQuery("mart*"),
},
nil),
},
{
input: `name:mart*`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewWildcardQuery("mart*")
q.SetField("name")
return q
}(),
},
nil),
},
// tests for escaping
// escape : as field delimiter
{
input: `name\:marty`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
NewMatchQuery("name:marty"),
},
nil),
},
// first colon delimiter, second escaped
{
input: `name:marty\:couchbase`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewMatchQuery("marty:couchbase")
q.SetField("name")
return q
}(),
},
nil),
},
// escape space, single argument to match query
{
input: `marty\ couchbase`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
NewMatchQuery("marty couchbase"),
},
nil),
},
// escape leading plus, not a must clause
{
input: `\+marty`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
NewMatchQuery("+marty"),
},
nil),
},
// escape leading minus, not a must not clause
{
input: `\-marty`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
NewMatchQuery("-marty"),
},
nil),
},
// escape quote inside of phrase
{
input: `"what does \"quote\" mean"`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
NewMatchPhraseQuery(`what does "quote" mean`),
},
nil),
},
// escaping an unsupported character retains backslash
{
input: `can\ i\ escap\e`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
NewMatchQuery(`can i escap\e`),
},
nil),
},
// leading spaces
{
input: ` what`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
NewMatchQuery(`what`),
},
nil),
},
// no boost value defaults to 1
{
input: `term^`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewMatchQuery(`term`)
q.SetBoost(1.0)
return q
}(),
},
nil),
},
// weird lexer cases, something that starts like a number
// but contains escape and ends up as string
{
input: `3.0\:`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
NewMatchQuery(`3.0:`),
},
nil),
},
{
input: `3.0\a`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
NewMatchQuery(`3.0\a`),
},
nil),
},
// field names as phrases
{
input: `"fie ld":test`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewMatchQuery("test")
q.SetField("fie ld")
return q
}(),
},
nil),
},
{
input: `"fie ld":"test"`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewMatchPhraseQuery("test")
q.SetField("fie ld")
return q
}(),
},
nil),
},
// exact match number with boost
{
input: `age:65^10`,
mapping: mapping.NewIndexMapping(),
result: NewBooleanQueryForQueryString(
nil,
[]Query{
func() Query {
q := NewDisjunctionQuery([]Query{
func() Query {
mq := NewMatchQuery("65")
mq.SetField("age")
return mq
}(),
func() Query {
val := float64(65)
inclusive := true
nq := NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
nq.SetField("age")
return nq
}(),
})
q.SetBoost(10)
q.queryStringMode = true
return q
}(),
},
nil),
},
}
// turn on lexer debugging
// debugLexer = true
// debugParser = true
// logger = log.New(os.Stderr, "bleve ", log.LstdFlags)
for _, test := range tests {
q, err := parseQuerySyntax(test.input)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(q, test.result) {
t.Errorf("Expected %#v, got %#v: for %s", test.result, q, test.input)
}
}
}
func TestQuerySyntaxParserInvalid(t *testing.T) {
tests := []struct {
input string
}{
{"^"},
{"^5"},
{"field:-text"},
{"field:+text"},
{"field:>text"},
{"field:>=text"},
{"field:99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999`},
{`field:>=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999`},
{`field:<99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999`},
{`field:<=99999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999999`},
}
// turn on lexer debugging
// debugLexer = true
// logger = log.New(os.Stderr, "bleve", log.LstdFlags)
for _, test := range tests {
_, err := parseQuerySyntax(test.input)
if err == nil {
t.Errorf("expected error, got nil for `%s`", test.input)
}
}
}
func BenchmarkLexer(b *testing.B) {
for n := 0; n < b.N; n++ {
var tokenTypes []int
var tokens []yySymType
r := strings.NewReader(`+field4:"test phrase 1"`)
l := newQueryStringLex(r)
var lval yySymType
rv := l.Lex(&lval)
for rv > 0 {
tokenTypes = append(tokenTypes, rv)
tokens = append(tokens, lval)
// use the slice to silence the compiler warning
_ = tokenTypes
_ = tokens
lval.s = ""
lval.n = 0
rv = l.Lex(&lval)
}
}
}
================================================
FILE: search/query/query_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"reflect"
"sort"
"strings"
"testing"
"time"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/mapping"
)
var minNum = 5.1
var maxNum = 7.1
var minTerm = "bob"
var maxTerm = "cat"
var startDateStr = "2011-01-01T00:00:00Z"
var endDateStr = "2012-01-01T00:00:00Z"
var startDate time.Time
var endDate time.Time
func init() {
var err error
startDate, err = time.Parse(time.RFC3339, startDateStr)
if err != nil {
panic(err)
}
endDate, err = time.Parse(time.RFC3339, endDateStr)
if err != nil {
panic(err)
}
}
func TestParseQuery(t *testing.T) {
tests := []struct {
input []byte
output Query
err bool
}{
{
input: []byte(`{"term":"water","field":"desc"}`),
output: func() Query {
q := NewTermQuery("water")
q.SetField("desc")
return q
}(),
},
{
input: []byte(`{"match":"beer","field":"desc"}`),
output: func() Query {
q := NewMatchQuery("beer")
q.SetField("desc")
return q
}(),
},
{
input: []byte(`{"match":"beer","field":"desc","operator":"or"}`),
output: func() Query {
q := NewMatchQuery("beer")
q.SetField("desc")
return q
}(),
},
{
input: []byte(`{"match":"beer","field":"desc","operator":"and"}`),
output: func() Query {
q := NewMatchQuery("beer")
q.SetOperator(MatchQueryOperatorAnd)
q.SetField("desc")
return q
}(),
},
{
input: []byte(`{"match":"beer","field":"desc","operator":"and"}`),
output: func() Query {
operator := MatchQueryOperatorAnd
q := NewMatchQuery("beer")
q.SetOperator(operator)
q.SetField("desc")
return q
}(),
},
{
input: []byte(`{"match":"beer","field":"desc","operator":"or"}`),
output: func() Query {
q := NewMatchQuery("beer")
q.SetOperator(MatchQueryOperatorOr)
q.SetField("desc")
return q
}(),
},
{
input: []byte(`{"match":"beer","field":"desc","operator":"or"}`),
output: func() Query {
operator := MatchQueryOperatorOr
q := NewMatchQuery("beer")
q.SetOperator(operator)
q.SetField("desc")
return q
}(),
},
{
input: []byte(`{"match":"beer","field":"desc","operator":"does not exist"}`),
output: nil,
err: true,
},
{
input: []byte(`{"match_phrase":"light beer","field":"desc"}`),
output: func() Query {
q := NewMatchPhraseQuery("light beer")
q.SetField("desc")
return q
}(),
},
{
input: []byte(`{"must":{"conjuncts": [{"match":"beer","field":"desc"}]},"should":{"disjuncts": [{"match":"water","field":"desc"}],"min":1.0},"must_not":{"disjuncts": [{"match":"devon","field":"desc"}]}}`),
output: func() Query {
q := NewBooleanQuery(
[]Query{func() Query {
q := NewMatchQuery("beer")
q.SetField("desc")
return q
}()},
[]Query{func() Query {
q := NewMatchQuery("water")
q.SetField("desc")
return q
}()},
[]Query{func() Query {
q := NewMatchQuery("devon")
q.SetField("desc")
return q
}()})
q.SetMinShould(1)
return q
}(),
},
{
input: []byte(`{"terms":["watered","down"],"field":"desc"}`),
output: NewPhraseQuery([]string{"watered", "down"}, "desc"),
},
{
input: []byte(`{"query":"+beer \"light beer\" -devon"}`),
output: NewQueryStringQuery(`+beer "light beer" -devon`),
},
{
input: []byte(`{"min":5.1,"max":7.1,"field":"desc"}`),
output: func() Query {
q := NewNumericRangeQuery(&minNum, &maxNum)
q.SetField("desc")
return q
}(),
},
{
input: []byte(`{"min":"bob","max":"cat","field":"desc"}`),
output: func() Query {
q := NewTermRangeQuery(minTerm, maxTerm)
q.SetField("desc")
return q
}(),
},
{
input: []byte(`{"start":"` + startDateStr + `","end":"` + endDateStr + `","field":"desc"}`),
output: func() Query {
q := NewDateRangeStringQuery(startDateStr, endDateStr)
q.SetField("desc")
return q
}(),
},
{
input: []byte(`{"prefix":"budwei","field":"desc"}`),
output: func() Query {
q := NewPrefixQuery("budwei")
q.SetField("desc")
return q
}(),
},
{
input: []byte(`{"match_all":{}}`),
output: NewMatchAllQuery(),
},
{
input: []byte(`{"match_none":{}}`),
output: NewMatchNoneQuery(),
},
{
input: []byte(`{"ids":["a","b","c"]}`),
output: NewDocIDQuery([]string{"a", "b", "c"}),
},
{
input: []byte(`{"bool": true}`),
output: NewBoolFieldQuery(true),
},
{
input: []byte(`{"field": "x", "cidr": "1.2.3.0/4"}`),
output: func() Query {
q := NewIPRangeQuery("1.2.3.0/4")
q.SetField("x")
return q
}(),
},
{
input: []byte(`{"madeitup":"queryhere"}`),
output: nil,
err: true,
},
}
for i, test := range tests {
actual, err := ParseQuery(test.input)
if err != nil && test.err == false {
t.Errorf("error %v for %d", err, i)
}
if !reflect.DeepEqual(test.output, actual) {
t.Errorf("expected: %#v, got: %#v for %s", test.output, actual, string(test.input))
}
}
}
func TestQueryValidate(t *testing.T) {
tests := []struct {
query Query
err bool
}{
{
query: func() Query {
q := NewTermQuery("water")
q.SetField("desc")
return q
}(),
},
{
query: func() Query {
q := NewMatchQuery("beer")
q.SetField("desc")
return q
}(),
},
{
query: func() Query {
q := NewMatchPhraseQuery("light beer")
q.SetField("desc")
return q
}(),
},
{
query: func() Query {
q := NewNumericRangeQuery(&minNum, &maxNum)
q.SetField("desc")
return q
}(),
},
{
query: func() Query {
q := NewNumericRangeQuery(nil, nil)
q.SetField("desc")
return q
}(),
err: true,
},
{
query: func() Query {
q := NewDateRangeQuery(startDate, endDate)
q.SetField("desc")
return q
}(),
},
{
query: func() Query {
q := NewPrefixQuery("budwei")
q.SetField("desc")
return q
}(),
},
{
query: NewQueryStringQuery(`+beer "light beer" -devon`),
},
{
query: NewPhraseQuery([]string{"watered", "down"}, "desc"),
},
{
query: NewPhraseQuery([]string{}, "field"),
err: true,
},
{
query: func() Query {
q := NewMatchNoneQuery()
q.SetBoost(25)
return q
}(),
},
{
query: func() Query {
q := NewMatchAllQuery()
q.SetBoost(25)
return q
}(),
},
{
query: NewBooleanQuery(
[]Query{func() Query {
q := NewMatchQuery("beer")
q.SetField("desc")
return q
}()},
[]Query{func() Query {
q := NewMatchQuery("water")
q.SetField("desc")
return q
}()},
[]Query{func() Query {
q := NewMatchQuery("devon")
q.SetField("desc")
return q
}()}),
},
{
query: NewBooleanQuery(
nil,
nil,
[]Query{func() Query {
q := NewMatchQuery("devon")
q.SetField("desc")
return q
}()}),
},
{
query: NewBooleanQuery(
[]Query{},
[]Query{},
[]Query{func() Query {
q := NewMatchQuery("devon")
q.SetField("desc")
return q
}()}),
},
{
query: NewBooleanQuery(
nil,
nil,
nil),
err: true,
},
{
query: NewBooleanQuery(
[]Query{},
[]Query{},
[]Query{}),
err: true,
},
{
query: func() Query {
q := NewBooleanQuery(
[]Query{func() Query {
q := NewMatchQuery("beer")
q.SetField("desc")
return q
}()},
[]Query{func() Query {
q := NewMatchQuery("water")
q.SetField("desc")
return q
}()},
[]Query{func() Query {
q := NewMatchQuery("devon")
q.SetField("desc")
return q
}()})
q.SetMinShould(2)
return q
}(),
err: true,
},
{
query: func() Query {
q := NewDocIDQuery(nil)
q.SetBoost(25)
return q
}(),
},
}
for _, test := range tests {
if vq, ok := test.query.(ValidatableQuery); ok {
actual := vq.Validate()
if actual != nil && !test.err {
t.Errorf("expected no error: %#v got %#v", test.err, actual)
} else if actual == nil && test.err {
t.Errorf("expected error: %#v got %#v", test.err, actual)
}
}
}
}
func TestDumpQuery(t *testing.T) {
mapping := mapping.NewIndexMapping()
q := NewQueryStringQuery("+water -light beer")
s, err := DumpQuery(mapping, q)
if err != nil {
t.Fatal(err)
}
s = strings.TrimSpace(s)
wanted := strings.TrimSpace(`{
"must": {
"conjuncts": [
{
"match": "water",
"prefix_length": 0,
"fuzziness": 0
}
]
},
"should": {
"disjuncts": [
{
"match": "beer",
"prefix_length": 0,
"fuzziness": 0
}
],
"min": 0
},
"must_not": {
"disjuncts": [
{
"match": "light",
"prefix_length": 0,
"fuzziness": 0
}
],
"min": 0
}
}`)
if wanted != s {
t.Fatalf("query:\n%s\ndiffers from expected:\n%s", s, wanted)
}
}
func TestGeoShapeQuery(t *testing.T) {
tests := []struct {
input []byte
output Query
err bool
}{
{
input: []byte(`{
"field" : "region",
"geometry": {
"shape": {
"type": "polygon",
"coordinates": [[
[
74.1357421875,
30.600093873550072
],
[
67.0166015625,
21.57571893245848
],
[
68.8623046875,
9.145486056167277
],
[
83.1884765625,
4.083452772038619
],
[
88.9892578125,
22.67484735118852
],
[
74.1357421875,
30.600093873550072
]]]
},
"relation": "intersects"
}}`),
output: func() Query {
q, _ := NewGeoShapeQuery([][][][]float64{{{{74.1357421875, 30.600093873550072},
{67.0166015625, 21.57571893245848}, {68.8623046875, 9.145486056167277},
{83.1884765625, 4.083452772038619}, {88.9892578125, 22.67484735118852},
{74.1357421875, 30.600093873550072}}}}, geo.PolygonType, "intersects")
q.SetField("region")
return q
}(),
},
{
input: []byte(`{
"field" : "region",
"geometry": {
"shape": {
"type": "multipolygon",
"coordinates": [
[[
[
77.58268117904663,
12.980513152175025
],
[
77.58147954940794,
12.977983107483992
],
[
77.58708000183104,
12.97886130773254
],
[
77.58268117904663,
12.980513152175025
]
]],
[[
[
77.5864577293396,
12.97762764459667
],
[
77.58879661560059,
12.975076660730531
],
[
77.59115695953369,
12.979216768855913
],
[
77.5864577293396,
12.97762764459667
]
]]
]
},
"relation": "contains"
}}`),
output: func() Query {
q, _ := NewGeoShapeQuery([][][][]float64{
{{{77.58268117904663, 12.980513152175025},
{77.58147954940794, 12.977983107483992}, {77.58708000183104, 12.97886130773254},
{77.58268117904663, 12.980513152175025}}},
{{{77.5864577293396, 12.97762764459667}, {77.58879661560059, 12.975076660730531},
{77.59115695953369, 12.979216768855913}, {77.5864577293396, 12.97762764459667}}}},
geo.MultiPolygonType, "contains")
q.SetField("region")
return q
}(),
},
{
input: []byte(`{
"field" : "region",
"geometry": {
"shape": {
"type": "point",
"coordinates": [77.58268117904663, 12.980513152175025]
},
"relation": "contains"
}}`),
output: func() Query {
q, _ := NewGeoShapeQuery([][][][]float64{
{{{77.58268117904663, 12.980513152175025}}}},
geo.PointType, "contains")
q.SetField("region")
return q
}(),
},
{
input: []byte(`{
"field" : "region",
"geometry": {
"shape": {
"type": "multipoint",
"coordinates": [[77.58268117904663, 12.980513152175025],
[77.5864577293396, 12.97762764459667]]
},
"relation": "intersects"
}}`),
output: func() Query {
q, _ := NewGeoShapeQuery([][][][]float64{
{{{77.58268117904663, 12.980513152175025},
{77.5864577293396, 12.97762764459667}}}},
geo.MultiPointType, "intersects")
q.SetField("region")
return q
}(),
},
{
input: []byte(`{
"field" : "region",
"geometry": {
"shape": {
"type": "linestring",
"coordinates": [[77.58268117904663, 12.980513152175025],
[77.5864577293396, 12.97762764459667]]
},
"relation": "intersects"
}}`),
output: func() Query {
q, _ := NewGeoShapeQuery([][][][]float64{
{{{77.58268117904663, 12.980513152175025},
{77.5864577293396, 12.97762764459667}}}},
geo.LineStringType, "intersects")
q.SetField("region")
return q
}(),
},
{
input: []byte(`{
"field" : "region",
"geometry": {
"shape": {
"type": "multilinestring",
"coordinates": [
[[77.58268117904663, 12.980513152175025],
[77.5864577293396, 12.97762764459667]],
[[77.5864577293396,12.97762764459667],
[77.58879661560059, 12.975076660730531]]]
},
"relation": "intersects"
}}`),
output: func() Query {
q, _ := NewGeoShapeQuery([][][][]float64{{
{{77.58268117904663, 12.980513152175025},
{77.5864577293396, 12.97762764459667}},
{{77.5864577293396, 12.97762764459667},
{77.58879661560059, 12.975076660730531}}}},
geo.MultiLineStringType, "intersects")
q.SetField("region")
return q
}(),
},
{
input: []byte(`{
"field" : "region",
"geometry": {
"shape": {
"type": "envelope",
"coordinates": [[77.58268117904663, 12.980513152175025],
[77.5864577293396, 12.97762764459667]]
},
"relation": "within"
}}`),
output: func() Query {
q, _ := NewGeoShapeQuery([][][][]float64{{
{{77.58268117904663, 12.980513152175025},
{77.5864577293396, 12.97762764459667}}}},
geo.EnvelopeType, "within")
q.SetField("region")
return q
}(),
},
{
input: []byte(`{
"field" : "region",
"geometry": {
"shape": {
"type": "circle",
"coordinates": [77.58268117904663, 12.980513152175025],
"radius": "100m"
},
"relation": "within"
}}`),
output: func() Query {
q, _ := NewGeoShapeCircleQuery([]float64{
77.58268117904663, 12.980513152175025},
"100m", "within")
q.SetField("region")
return q
}(),
},
{
input: []byte(`{
"field" : "region",
"geometry": {
"shape": {
"type": "geometrycollection",
"geometries": [
{
"type": "point",
"coordinates": [
77.59158611297607,
12.972002899506203
]
},
{
"type": "linestring",
"coordinates": [
[
77.58851766586304,
12.973152950670608
],
[
77.58937597274779,
12.972212000113458
]
]
},
{
"type": "polygon",
"coordinates": [
[
[
77.59055614471436,
12.974721193688106
],
[
77.58954763412476,
12.97350841995465
],
[
77.59141445159912,
12.973382960265356
],
[
77.59055614471436,
12.974721193688106
]
]
]
}
]
},
"relation": "contains"
}}`),
output: func() Query {
q, _ := NewGeometryCollectionQuery([][][][][]float64{
{{{{77.59158611297607, 12.972002899506203}}}},
{{{{77.58851766586304, 12.973152950670608}, {77.58937597274779, 12.972212000113458}}}},
{{{{77.59055614471436, 12.974721193688106}, {77.58954763412476, 12.97350841995465},
{77.59141445159912, 12.973382960265356}, {77.59055614471436, 12.974721193688106}}}},
},
[]string{"point", "linestring", "polygon"}, "contains")
q.SetField("region")
return q
}(),
},
}
for i, test := range tests {
actual, err := ParseQuery(test.input)
if err != nil && test.err == false {
t.Errorf("error %v for %d", err, i)
}
if !reflect.DeepEqual(test.output, actual) {
t.Errorf("expected: %#v, got: %#v for %s", test.output, actual, string(test.input))
}
}
}
func TestParseEmptyQuery(t *testing.T) {
var qBytes []byte
rv, err := ParseQuery(qBytes)
if err != nil {
t.Fatal(err)
}
expect := NewMatchNoneQuery()
if !reflect.DeepEqual(rv, expect) {
t.Errorf("[1] Expected %#v, got %#v", expect, rv)
}
qBytes = []byte(`{}`)
rv, err = ParseQuery(qBytes)
if err != nil {
t.Fatal(err)
}
expect = NewMatchNoneQuery()
if !reflect.DeepEqual(rv, expect) {
t.Errorf("[2] Expected %#v, got %#v", expect, rv)
}
}
func TestExtractFields(t *testing.T) {
testQueries := []struct {
query string
expFields []string
}{
{
query: `{"term":"water","field":"desc"}`,
expFields: []string{"desc"},
},
{
query: `{
"must": {
"conjuncts": [
{
"match": "water",
"prefix_length": 0,
"fuzziness": 0
}
]
},
"should": {
"disjuncts": [
{
"match": "beer",
"prefix_length": 0,
"fuzziness": 0
}
],
"min": 0
},
"must_not": {
"disjuncts": [
{
"match": "light",
"prefix_length": 0,
"fuzziness": 0
}
],
"min": 0
}
}`,
expFields: []string{"_all"},
},
{
query: `{
"must": {
"conjuncts": [
{
"match": "water",
"prefix_length": 0,
"field": "desc",
"fuzziness": 0
}
]
},
"should": {
"disjuncts": [
{
"match": "beer",
"prefix_length": 0,
"field": "desc",
"fuzziness": 0
}
],
"min": 0
},
"must_not": {
"disjuncts": [
{
"match": "light",
"prefix_length": 0,
"field": "genre",
"fuzziness": 0
}
],
"min": 0
}
}`,
expFields: []string{"desc", "genre"},
},
{
query: `
{
"conjuncts": [
{
"conjuncts": [
{
"conjuncts": [
{
"conjuncts": [
{
"field": "date",
"start": "2002-09-05T08:09:00Z",
"end": "2007-03-01T03:52:00Z",
"inclusive_start": true,
"inclusive_end": true
},
{
"field": "number",
"min": 1260295,
"max": 3917314,
"inclusive_min": true,
"inclusive_max": true
}
]
},
{
"conjuncts": [
{
"field": "date2",
"start": "2004-08-21T18:30:00Z",
"end": "2006-03-24T08:08:00Z",
"inclusive_start": true,
"inclusive_end": true
},
{
"field": "number",
"min": 165449,
"max": 3847517,
"inclusive_min": true,
"inclusive_max": true
}
]
}
]
},
{
"conjuncts": [
{
"conjuncts": [
{
"field": "date",
"start": "2004-09-02T22:15:00Z",
"end": "2008-06-22T15:06:00Z",
"inclusive_start": true,
"inclusive_end": true
},
{
"field": "number2",
"min": 876843,
"max": 3363351,
"inclusive_min": true,
"inclusive_max": true
}
]
},
{
"conjuncts": [
{
"field": "date",
"start": "2000-12-03T21:35:00Z",
"end": "2008-02-07T05:00:00Z",
"inclusive_start": true,
"inclusive_end": true
},
{
"field": "number",
"min": 2021479,
"max": 4763404,
"inclusive_min": true,
"inclusive_max": true
}
]
}
]
}
]
},
{
"conjuncts": [
{
"conjuncts": [
{
"field": "date3",
"start": "2000-03-13T07:13:00Z",
"end": "2005-09-19T09:33:00Z",
"inclusive_start": true,
"inclusive_end": true
},
{
"field": "number",
"min": 883125,
"max": 4817433,
"inclusive_min": true,
"inclusive_max": true
}
]
},
{
"conjuncts": [
{
"field": "date",
"start": "2002-08-10T22:42:00Z",
"end": "2008-02-10T23:19:00Z",
"inclusive_start": true,
"inclusive_end": true
},
{
"field": "number",
"min": 896115,
"max": 3897074,
"inclusive_min": true,
"inclusive_max": true
}
]
}
]
}
]
}`,
expFields: []string{"date", "number", "date2", "number2", "date3"},
},
{
query: `{
"query" : "hardworking people"
}`,
expFields: []string{"_all"},
},
{
query: `{
"query" : "text:hardworking people"
}`,
expFields: []string{"text", "_all"},
},
{
query: `{
"query" : "text:\"hardworking people\""
}`,
expFields: []string{"text"},
},
{
query: `{
"match_all": {}
}`,
expFields: []string{"_id"},
},
{
query: `{
"ids": ["a", "b", "c"]
}`,
expFields: []string{"_id"},
},
}
m := mapping.NewIndexMapping()
for i, test := range testQueries {
q, err := ParseQuery([]byte(test.query))
if err != nil {
t.Fatal(err)
}
fields, err := ExtractFields(q, m, nil)
if err != nil {
t.Fatal(err)
}
var fieldsSlice []string
for k := range fields {
fieldsSlice = append(fieldsSlice, k)
}
sort.Strings(test.expFields)
sort.Strings(fieldsSlice)
if !reflect.DeepEqual(fieldsSlice, test.expFields) {
t.Errorf("Test %d: expected %v, got %v", i, test.expFields, fieldsSlice)
}
}
}
================================================
FILE: search/query/regexp.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"strings"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type RegexpQuery struct {
Regexp string `json:"regexp"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewRegexpQuery creates a new Query which finds
// documents containing terms that match the
// specified regular expression. The regexp pattern
// SHOULD NOT include ^ or $ modifiers, the search
// will only match entire terms even without them.
func NewRegexpQuery(regexp string) *RegexpQuery {
return &RegexpQuery{
Regexp: regexp,
}
}
func (q *RegexpQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *RegexpQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *RegexpQuery) SetField(f string) {
q.FieldVal = f
}
func (q *RegexpQuery) Field() string {
return q.FieldVal
}
func (q *RegexpQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
// require that pattern NOT be anchored to start and end of term.
// do not attempt to remove trailing $, its presence is not
// known to interfere with LiteralPrefix() the way ^ does
// and removing $ introduces possible ambiguities with escaped \$, \\$, etc
actualRegexp := q.Regexp
actualRegexp = strings.TrimPrefix(actualRegexp, "^") // remove leading ^ if it exists
return searcher.NewRegexpStringSearcher(ctx, i, actualRegexp, field, q.BoostVal.Value(), options)
}
func (q *RegexpQuery) Validate() error {
return nil // real validation delayed until searcher constructor
}
================================================
FILE: search/query/term.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type TermQuery struct {
Term string `json:"term"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewTermQuery creates a new Query for finding an
// exact term match in the index.
func NewTermQuery(term string) *TermQuery {
return &TermQuery{
Term: term,
}
}
func (q *TermQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *TermQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *TermQuery) SetField(f string) {
q.FieldVal = f
}
func (q *TermQuery) Field() string {
return q.FieldVal
}
func (q *TermQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
return searcher.NewTermSearcher(ctx, i, q.Term, field, q.BoostVal.Value(), options)
}
================================================
FILE: search/query/term_range.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"fmt"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
type TermRangeQuery struct {
Min string `json:"min,omitempty"`
Max string `json:"max,omitempty"`
InclusiveMin *bool `json:"inclusive_min,omitempty"`
InclusiveMax *bool `json:"inclusive_max,omitempty"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewTermRangeQuery creates a new Query for ranges
// of text term values.
// Either, but not both endpoints can be nil.
// The minimum value is inclusive.
// The maximum value is exclusive.
func NewTermRangeQuery(min, max string) *TermRangeQuery {
return NewTermRangeInclusiveQuery(min, max, nil, nil)
}
// NewTermRangeInclusiveQuery creates a new Query for ranges
// of numeric values.
// Either, but not both endpoints can be nil.
// Control endpoint inclusion with inclusiveMin, inclusiveMax.
func NewTermRangeInclusiveQuery(min, max string, minInclusive, maxInclusive *bool) *TermRangeQuery {
return &TermRangeQuery{
Min: min,
Max: max,
InclusiveMin: minInclusive,
InclusiveMax: maxInclusive,
}
}
func (q *TermRangeQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *TermRangeQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *TermRangeQuery) SetField(f string) {
q.FieldVal = f
}
func (q *TermRangeQuery) Field() string {
return q.FieldVal
}
func (q *TermRangeQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
var minTerm []byte
if q.Min != "" {
minTerm = []byte(q.Min)
}
var maxTerm []byte
if q.Max != "" {
maxTerm = []byte(q.Max)
}
return searcher.NewTermRangeSearcher(ctx, i, minTerm, maxTerm, q.InclusiveMin, q.InclusiveMax, field, q.BoostVal.Value(), options)
}
func (q *TermRangeQuery) Validate() error {
if q.Min == "" && q.Min == q.Max {
return fmt.Errorf("term range query must specify min or max")
}
return nil
}
================================================
FILE: search/query/wildcard.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package query
import (
"context"
"strings"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/searcher"
index "github.com/blevesearch/bleve_index_api"
)
var wildcardRegexpReplacer = strings.NewReplacer(
// characters in the wildcard that must
// be escaped in the regexp
"+", `\+`,
"(", `\(`,
")", `\)`,
"^", `\^`,
"$", `\$`,
".", `\.`,
"{", `\{`,
"}", `\}`,
"[", `\[`,
"]", `\]`,
`|`, `\|`,
`\`, `\\`,
// wildcard characters
"*", ".*",
"?", ".")
type WildcardQuery struct {
Wildcard string `json:"wildcard"`
FieldVal string `json:"field,omitempty"`
BoostVal *Boost `json:"boost,omitempty"`
}
// NewWildcardQuery creates a new Query which finds
// documents containing terms that match the
// specified wildcard. In the wildcard pattern '*'
// will match any sequence of 0 or more characters,
// and '?' will match any single character.
func NewWildcardQuery(wildcard string) *WildcardQuery {
return &WildcardQuery{
Wildcard: wildcard,
}
}
func (q *WildcardQuery) SetBoost(b float64) {
boost := Boost(b)
q.BoostVal = &boost
}
func (q *WildcardQuery) Boost() float64 {
return q.BoostVal.Value()
}
func (q *WildcardQuery) SetField(f string) {
q.FieldVal = f
}
func (q *WildcardQuery) Field() string {
return q.FieldVal
}
func (q *WildcardQuery) Searcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping, options search.SearcherOptions) (search.Searcher, error) {
field := q.FieldVal
if q.FieldVal == "" {
field = m.DefaultSearchField()
}
regexpString := wildcardRegexpReplacer.Replace(q.Wildcard)
return searcher.NewRegexpStringSearcher(ctx, i, regexpString, field,
q.BoostVal.Value(), options)
}
func (q *WildcardQuery) Validate() error {
return nil // real validation delayed until searcher constructor
}
================================================
FILE: search/scorer/scorer_conjunction.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorer
import (
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
)
var reflectStaticSizeConjunctionQueryScorer int
func init() {
var cqs ConjunctionQueryScorer
reflectStaticSizeConjunctionQueryScorer = int(reflect.TypeOf(cqs).Size())
}
type ConjunctionQueryScorer struct {
options search.SearcherOptions
}
func (s *ConjunctionQueryScorer) Size() int {
return reflectStaticSizeConjunctionQueryScorer + size.SizeOfPtr
}
func NewConjunctionQueryScorer(options search.SearcherOptions) *ConjunctionQueryScorer {
return &ConjunctionQueryScorer{
options: options,
}
}
func (s *ConjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch) *search.DocumentMatch {
var sum float64
var childrenExplanations []*search.Explanation
if s.options.Explain {
childrenExplanations = make([]*search.Explanation, len(constituents))
}
for i, docMatch := range constituents {
sum += docMatch.Score
if s.options.Explain {
childrenExplanations[i] = docMatch.Expl
}
}
newScore := sum
var newExpl *search.Explanation
if s.options.Explain {
newExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
}
// reuse constituents[0] as the return value
rv := constituents[0]
rv.Score = newScore
rv.Expl = newExpl
rv.FieldTermLocations = search.MergeFieldTermLocations(
rv.FieldTermLocations, constituents[1:])
return rv
}
================================================
FILE: search/scorer/scorer_constant.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorer
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeConstantScorer int
func init() {
var cs ConstantScorer
reflectStaticSizeConstantScorer = int(reflect.TypeOf(cs).Size())
}
type ConstantScorer struct {
constant float64
boost float64
options search.SearcherOptions
queryNorm float64
queryWeight float64
queryWeightExplanation *search.Explanation
includeScore bool
}
func (s *ConstantScorer) Size() int {
sizeInBytes := reflectStaticSizeConstantScorer + size.SizeOfPtr
if s.queryWeightExplanation != nil {
sizeInBytes += s.queryWeightExplanation.Size()
}
return sizeInBytes
}
func NewConstantScorer(constant float64, boost float64, options search.SearcherOptions) *ConstantScorer {
rv := ConstantScorer{
options: options,
queryWeight: 1.0,
constant: constant,
boost: boost,
includeScore: options.Score != "none",
}
return &rv
}
func (s *ConstantScorer) Weight() float64 {
sum := s.boost
return sum * sum
}
func (s *ConstantScorer) SetQueryNorm(qnorm float64) {
s.queryNorm = qnorm
// update the query weight
s.queryWeight = s.boost * s.queryNorm
if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 2)
childrenExplanations[0] = &search.Explanation{
Value: s.boost,
Message: "boost",
}
childrenExplanations[1] = &search.Explanation{
Value: s.queryNorm,
Message: "queryNorm",
}
s.queryWeightExplanation = &search.Explanation{
Value: s.queryWeight,
Message: fmt.Sprintf("ConstantScore()^%f, product of:", s.boost),
Children: childrenExplanations,
}
}
}
func (s *ConstantScorer) Score(ctx *search.SearchContext, id index.IndexInternalID) *search.DocumentMatch {
var scoreExplanation *search.Explanation
rv := ctx.DocumentMatchPool.Get()
rv.IndexInternalID = id
if s.includeScore {
score := s.constant
if s.options.Explain {
scoreExplanation = &search.Explanation{
Value: score,
Message: "ConstantScore()",
}
}
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.options.Explain {
childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("weight(^%f), product of:", s.boost),
Children: childExplanations,
}
}
}
rv.Score = score
if s.options.Explain {
rv.Expl = scoreExplanation
}
}
return rv
}
================================================
FILE: search/scorer/scorer_constant_test.go
================================================
// Copyright (c) 2013 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorer
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestConstantScorer(t *testing.T) {
scorer := NewConstantScorer(1, 1, search.SearcherOptions{Explain: true})
tests := []struct {
termMatch *index.TermFieldDoc
result *search.DocumentMatch
}{
// test some simple math
{
termMatch: &index.TermFieldDoc{
ID: index.IndexInternalID("one"),
Freq: 1,
Norm: 1.0,
Vectors: []*index.TermFieldVector{
{
Field: "desc",
Pos: 1,
Start: 0,
End: 4,
},
},
},
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: 1.0,
Expl: &search.Explanation{
Value: 1.0,
Message: "ConstantScore()",
},
Sort: []string{},
},
},
}
for _, test := range tests {
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
}
actual := scorer.Score(ctx, test.termMatch.ID)
if !reflect.DeepEqual(actual, test.result) {
t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
}
}
}
func TestConstantScorerWithQueryNorm(t *testing.T) {
scorer := NewConstantScorer(1, 1, search.SearcherOptions{Explain: true})
scorer.SetQueryNorm(2.0)
tests := []struct {
termMatch *index.TermFieldDoc
result *search.DocumentMatch
}{
{
termMatch: &index.TermFieldDoc{
ID: index.IndexInternalID("one"),
Freq: 1,
Norm: 1.0,
},
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: 2.0,
Sort: []string{},
Expl: &search.Explanation{
Value: 2.0,
Message: "weight(^1.000000), product of:",
Children: []*search.Explanation{
{
Value: 2.0,
Message: "ConstantScore()^1.000000, product of:",
Children: []*search.Explanation{
{
Value: 1,
Message: "boost",
},
{
Value: 2,
Message: "queryNorm",
},
},
},
{
Value: 1.0,
Message: "ConstantScore()",
},
},
},
},
},
}
for _, test := range tests {
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
}
actual := scorer.Score(ctx, test.termMatch.ID)
if !reflect.DeepEqual(actual, test.result) {
t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
}
}
}
================================================
FILE: search/scorer/scorer_disjunction.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorer
import (
"fmt"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
)
var reflectStaticSizeDisjunctionQueryScorer int
func init() {
var dqs DisjunctionQueryScorer
reflectStaticSizeDisjunctionQueryScorer = int(reflect.TypeOf(dqs).Size())
}
type DisjunctionQueryScorer struct {
options search.SearcherOptions
}
func (s *DisjunctionQueryScorer) Size() int {
return reflectStaticSizeDisjunctionQueryScorer + size.SizeOfPtr
}
func NewDisjunctionQueryScorer(options search.SearcherOptions) *DisjunctionQueryScorer {
return &DisjunctionQueryScorer{
options: options,
}
}
func (s *DisjunctionQueryScorer) Score(ctx *search.SearchContext, constituents []*search.DocumentMatch, countMatch, countTotal int) *search.DocumentMatch {
var sum float64
var childrenExplanations []*search.Explanation
if s.options.Explain {
childrenExplanations = make([]*search.Explanation, len(constituents))
}
for i, docMatch := range constituents {
sum += docMatch.Score
if s.options.Explain {
childrenExplanations[i] = docMatch.Expl
}
}
var rawExpl *search.Explanation
if s.options.Explain {
rawExpl = &search.Explanation{Value: sum, Message: "sum of:", Children: childrenExplanations}
}
coord := float64(countMatch) / float64(countTotal)
newScore := sum * coord
var newExpl *search.Explanation
if s.options.Explain {
ce := make([]*search.Explanation, 2)
ce[0] = rawExpl
ce[1] = &search.Explanation{Value: coord, Message: fmt.Sprintf("coord(%d/%d)", countMatch, countTotal)}
newExpl = &search.Explanation{Value: newScore, Message: "product of:", Children: ce, PartialMatch: countMatch != countTotal}
}
// reuse constituents[0] as the return value
rv := constituents[0]
rv.Score = newScore
rv.Expl = newExpl
rv.FieldTermLocations = search.MergeFieldTermLocations(
rv.FieldTermLocations, constituents[1:])
return rv
}
// This method is used only when disjunction searcher is used over multiple
// KNN searchers, where only the score breakdown and the optional explanation breakdown
// is required. The final score and explanation is set when we finalize the KNN hits.
func (s *DisjunctionQueryScorer) ScoreAndExplBreakdown(ctx *search.SearchContext, constituents []*search.DocumentMatch,
matchingIdxs []int, originalPositions []int, countTotal int) *search.DocumentMatch {
rv := constituents[0]
if rv.ScoreBreakdown == nil {
rv.ScoreBreakdown = make(map[int]float64, len(constituents))
}
var childrenExplanations []*search.Explanation
if s.options.Explain {
// since we want to notify which expl belongs to which matched searcher within the disjunction searcher
childrenExplanations = make([]*search.Explanation, countTotal)
}
for i, docMatch := range constituents {
var index int
if originalPositions != nil {
// scorer used in disjunction slice searcher
index = originalPositions[matchingIdxs[i]]
} else {
// scorer used in disjunction heap searcher
index = matchingIdxs[i]
}
rv.ScoreBreakdown[index] = docMatch.Score
if s.options.Explain {
childrenExplanations[index] = docMatch.Expl
}
}
var explBreakdown *search.Explanation
if s.options.Explain {
explBreakdown = &search.Explanation{Children: childrenExplanations}
}
rv.Expl = explBreakdown
rv.FieldTermLocations = search.MergeFieldTermLocations(
rv.FieldTermLocations, constituents[1:])
return rv
}
================================================
FILE: search/scorer/scorer_knn.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package scorer
import (
"fmt"
"math"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeKNNQueryScorer int
func init() {
var sqs KNNQueryScorer
reflectStaticSizeKNNQueryScorer = int(reflect.TypeOf(sqs).Size())
}
type KNNQueryScorer struct {
queryVector []float32
queryField string
queryWeight float64
queryBoost float64
queryNorm float64
options search.SearcherOptions
similarityMetric string
queryWeightExplanation *search.Explanation
}
func (s *KNNQueryScorer) Size() int {
sizeInBytes := reflectStaticSizeKNNQueryScorer + size.SizeOfPtr +
(len(s.queryVector) * size.SizeOfFloat32) + len(s.queryField) +
len(s.similarityMetric)
if s.queryWeightExplanation != nil {
sizeInBytes += s.queryWeightExplanation.Size()
}
return sizeInBytes
}
func NewKNNQueryScorer(queryVector []float32, queryField string, queryBoost float64,
options search.SearcherOptions,
similarityMetric string) *KNNQueryScorer {
return &KNNQueryScorer{
queryVector: queryVector,
queryField: queryField,
queryBoost: queryBoost,
queryWeight: 1.0,
options: options,
similarityMetric: similarityMetric,
}
}
// Score used when the knnMatch.Score = 0 ->
// the query and indexed vector are exactly the same.
const maxKNNScore = math.MaxFloat32
func (sqs *KNNQueryScorer) Score(ctx *search.SearchContext,
knnMatch *index.VectorDoc) *search.DocumentMatch {
rv := ctx.DocumentMatchPool.Get()
var scoreExplanation *search.Explanation
score := knnMatch.Score
if sqs.similarityMetric == index.EuclideanDistance {
// in case of euclidean distance being the distance metric,
// an exact vector (perfect match), would return distance = 0
if score == 0 {
score = maxKNNScore
} else {
// euclidean distances need to be inverted to work with
// tf-idf scoring
score = 1.0 / score
}
}
if sqs.options.Explain {
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("fieldWeight(%s in doc %s), score of:",
sqs.queryField, knnMatch.ID),
Children: []*search.Explanation{
{
Value: score,
Message: fmt.Sprintf("vector(field(%s:%s) with similarity_metric(%s)=%e",
sqs.queryField, knnMatch.ID, sqs.similarityMetric, score),
},
},
}
}
// if the query weight isn't 1, multiply
if sqs.queryWeight != 1.0 && score != maxKNNScore {
score = score * sqs.queryWeight
if sqs.options.Explain {
scoreExplanation = &search.Explanation{
Value: score,
// Product of score * weight
// Avoid adding the query vector to the explanation since vectors
// can get quite large.
Message: fmt.Sprintf("weight(%s:query Vector^%f in %s), product of:",
sqs.queryField, sqs.queryBoost, knnMatch.ID),
Children: []*search.Explanation{sqs.queryWeightExplanation, scoreExplanation},
}
}
}
rv.Score = score
if sqs.options.Explain {
rv.Expl = scoreExplanation
}
rv.IndexInternalID = index.NewIndexInternalIDFrom(rv.IndexInternalID, knnMatch.ID)
return rv
}
func (sqs *KNNQueryScorer) Weight() float64 {
return 1.0
}
func (sqs *KNNQueryScorer) SetQueryNorm(qnorm float64) {
sqs.queryNorm = qnorm
// update the query weight
sqs.queryWeight = sqs.queryBoost * sqs.queryNorm
if sqs.options.Explain {
childrenExplanations := make([]*search.Explanation, 2)
childrenExplanations[0] = &search.Explanation{
Value: sqs.queryBoost,
Message: "boost",
}
childrenExplanations[1] = &search.Explanation{
Value: sqs.queryNorm,
Message: "queryNorm",
}
sqs.queryWeightExplanation = &search.Explanation{
Value: sqs.queryWeight,
Message: fmt.Sprintf("queryWeight(%s:query Vector^%f), product of:",
sqs.queryField, sqs.queryBoost),
Children: childrenExplanations,
}
}
}
================================================
FILE: search/scorer/scorer_knn_test.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package scorer
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestKNNScorerExplanation(t *testing.T) {
var queryVector []float32
// arbitrary vector of dims: 64
for i := 0; i < 64; i++ {
queryVector = append(queryVector, float32(i))
}
var resVector []float32
// arbitrary res vector.
for i := 0; i < 64; i++ {
resVector = append(resVector, float32(i))
}
tests := []struct {
vectorMatch *index.VectorDoc
scorer *KNNQueryScorer
norm float64
result *search.DocumentMatch
}{
{
vectorMatch: &index.VectorDoc{
ID: index.IndexInternalID("one"),
Score: 0.5,
Vector: resVector,
},
norm: 1.0,
scorer: NewKNNQueryScorer(queryVector, "desc", 1.0,
search.SearcherOptions{Explain: true}, index.EuclideanDistance),
// Specifically testing EuclideanDistance since that involves score inversion.
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: 0.5,
Expl: &search.Explanation{
Value: 1 / 0.5,
Message: "fieldWeight(desc in doc one), score of:",
Children: []*search.Explanation{
{
Value: 1 / 0.5,
Message: "vector(field(desc:one) with similarity_metric(l2_norm)=2.000000e+00",
},
},
},
},
},
{
vectorMatch: &index.VectorDoc{
ID: index.IndexInternalID("one"),
Score: 0.0,
// Result vector is an exact match of an existing vector.
Vector: queryVector,
},
norm: 1.0,
scorer: NewKNNQueryScorer(queryVector, "desc", 1.0,
search.SearcherOptions{Explain: true}, index.EuclideanDistance),
// Specifically testing EuclideanDistance with 0 score.
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: 0.0,
Expl: &search.Explanation{
Value: maxKNNScore,
Message: "fieldWeight(desc in doc one), score of:",
Children: []*search.Explanation{
{
Value: maxKNNScore,
Message: "vector(field(desc:one) with similarity_metric(l2_norm)=3.402823e+38",
},
},
},
},
},
{
vectorMatch: &index.VectorDoc{
ID: index.IndexInternalID("one"),
Score: 0.5,
Vector: resVector,
},
norm: 1.0,
scorer: NewKNNQueryScorer(queryVector, "desc", 1.0,
search.SearcherOptions{Explain: true}, index.InnerProduct),
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: 0.5,
Expl: &search.Explanation{
Value: 0.5,
Message: "fieldWeight(desc in doc one), score of:",
Children: []*search.Explanation{
{
Value: 0.5,
Message: "vector(field(desc:one) with similarity_metric(dot_product)=5.000000e-01",
},
},
},
},
},
{
vectorMatch: &index.VectorDoc{
ID: index.IndexInternalID("one"),
Score: 0.25,
Vector: resVector,
},
norm: 0.5,
scorer: NewKNNQueryScorer(queryVector, "desc", 1.0,
search.SearcherOptions{Explain: true}, index.InnerProduct),
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: 0.25,
Expl: &search.Explanation{
Value: 0.125,
Message: "weight(desc:query Vector^1.000000 in one), product of:",
Children: []*search.Explanation{
{
Value: 0.5,
Message: "queryWeight(desc:query Vector^1.000000), product of:",
Children: []*search.Explanation{
{
Value: 1,
Message: "boost",
},
{
Value: 0.5,
Message: "queryNorm",
},
},
},
{
Value: 0.25,
Message: "fieldWeight(desc in doc one), score of:",
Children: []*search.Explanation{
{
Value: 0.25,
Message: "vector(field(desc:one) with similarity_metric(dot_product)=2.500000e-01",
},
},
},
},
},
},
},
}
for _, test := range tests {
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
}
test.scorer.SetQueryNorm(test.norm)
actual := test.scorer.Score(ctx, test.vectorMatch)
actual.Complete(nil)
if !reflect.DeepEqual(actual.Expl, test.result.Expl) {
t.Errorf("expected %#v got %#v for %#v", test.result.Expl,
actual.Expl, test.vectorMatch)
}
}
}
================================================
FILE: search/scorer/scorer_term.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorer
import (
"fmt"
"math"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeTermQueryScorer int
func init() {
var tqs TermQueryScorer
reflectStaticSizeTermQueryScorer = int(reflect.TypeOf(tqs).Size())
}
type TermQueryScorer struct {
queryTerm string
queryField string
queryBoost float64
docTerm uint64 // number of documents containing the term
docTotal uint64 // total number of documents in the index
avgDocLength float64
idf float64
options search.SearcherOptions
idfExplanation *search.Explanation
includeScore bool
queryNorm float64
queryWeight float64
queryWeightExplanation *search.Explanation
}
func (s *TermQueryScorer) Size() int {
sizeInBytes := reflectStaticSizeTermQueryScorer + size.SizeOfPtr +
len(s.queryTerm) + len(s.queryField)
if s.idfExplanation != nil {
sizeInBytes += s.idfExplanation.Size()
}
if s.queryWeightExplanation != nil {
sizeInBytes += s.queryWeightExplanation.Size()
}
return sizeInBytes
}
func (s *TermQueryScorer) computeIDF(avgDocLength float64, docTotal, docTerm uint64) float64 {
var rv float64
if avgDocLength > 0 {
// avgDocLength is set only for bm25 scoring
rv = math.Log(1 + (float64(docTotal)-float64(docTerm)+0.5)/
(float64(docTerm)+0.5))
} else {
rv = 1.0 + math.Log(float64(docTotal)/
float64(docTerm+1.0))
}
return rv
}
// queryTerm - the specific term being scored by this scorer object
// queryField - the field in which the term is being searched
// queryBoost - the boost value for the query term
// docTotal - total number of documents in the index
// docTerm - number of documents containing the term
// avgDocLength - average document length in the index
// options - search options such as explain scoring, include the location of the term etc.
func NewTermQueryScorer(queryTerm []byte, queryField string, queryBoost float64, docTotal,
docTerm uint64, avgDocLength float64, options search.SearcherOptions) *TermQueryScorer {
rv := TermQueryScorer{
queryTerm: string(queryTerm),
queryField: queryField,
queryBoost: queryBoost,
docTerm: docTerm,
docTotal: docTotal,
avgDocLength: avgDocLength,
options: options,
queryWeight: 1.0,
includeScore: options.Score != "none",
}
rv.idf = rv.computeIDF(avgDocLength, docTotal, docTerm)
if options.Explain {
rv.idfExplanation = &search.Explanation{
Value: rv.idf,
Message: fmt.Sprintf("idf(docFreq=%d, maxDocs=%d)", docTerm, docTotal),
}
}
return &rv
}
func (s *TermQueryScorer) Weight() float64 {
sum := s.queryBoost * s.idf
return sum * sum
}
func (s *TermQueryScorer) SetQueryNorm(qnorm float64) {
s.queryNorm = qnorm
// update the query weight
s.queryWeight = s.queryBoost * s.idf * s.queryNorm
if s.options.Explain {
childrenExplanations := make([]*search.Explanation, 3)
childrenExplanations[0] = &search.Explanation{
Value: s.queryBoost,
Message: "boost",
}
childrenExplanations[1] = s.idfExplanation
childrenExplanations[2] = &search.Explanation{
Value: s.queryNorm,
Message: "queryNorm",
}
s.queryWeightExplanation = &search.Explanation{
Value: s.queryWeight,
Message: fmt.Sprintf("queryWeight(%s:%s^%f), product of:", s.queryField, s.queryTerm, s.queryBoost),
Children: childrenExplanations,
}
}
}
func (s *TermQueryScorer) docScore(tf, norm float64) (score float64, model string) {
if s.avgDocLength > 0 {
// bm25 scoring
// using the posting's norm value to recompute the field length for the doc num
fieldLength := 1 / (norm * norm)
score = s.idf * (tf * search.BM25_k1) /
(tf + search.BM25_k1*(1-search.BM25_b+(search.BM25_b*fieldLength/s.avgDocLength)))
model = index.BM25Scoring
} else {
// tf-idf scoring by default
score = tf * norm * s.idf
model = index.DefaultScoringModel
}
return score, model
}
func (s *TermQueryScorer) scoreExplanation(tf float64, termMatch *index.TermFieldDoc) []*search.Explanation {
var rv []*search.Explanation
if s.avgDocLength > 0 {
fieldLength := 1 / (termMatch.Norm * termMatch.Norm)
fieldNormVal := 1 - search.BM25_b + (search.BM25_b * fieldLength / s.avgDocLength)
fieldNormalizeExplanation := &search.Explanation{
Value: fieldNormVal,
Message: fmt.Sprintf("fieldNorm(field=%s), b=%f, fieldLength=%f, avgFieldLength=%f)",
s.queryField, search.BM25_b, fieldLength, s.avgDocLength),
}
saturationExplanation := &search.Explanation{
Value: search.BM25_k1 / (tf + search.BM25_k1*fieldNormVal),
Message: fmt.Sprintf("saturation(term:%s), k1=%f/(tf=%f + k1*fieldNorm=%f))",
termMatch.Term, search.BM25_k1, tf, fieldNormVal),
Children: []*search.Explanation{fieldNormalizeExplanation},
}
rv = make([]*search.Explanation, 3)
rv[0] = &search.Explanation{
Value: tf,
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
}
rv[1] = saturationExplanation
rv[2] = s.idfExplanation
} else {
rv = make([]*search.Explanation, 3)
rv[0] = &search.Explanation{
Value: tf,
Message: fmt.Sprintf("tf(termFreq(%s:%s)=%d", s.queryField, s.queryTerm, termMatch.Freq),
}
rv[1] = &search.Explanation{
Value: termMatch.Norm,
Message: fmt.Sprintf("fieldNorm(field=%s, doc=%s)", s.queryField, termMatch.ID),
}
rv[2] = s.idfExplanation
}
return rv
}
func (s *TermQueryScorer) Score(ctx *search.SearchContext, termMatch *index.TermFieldDoc) *search.DocumentMatch {
rv := ctx.DocumentMatchPool.Get()
// perform any score computations only when needed
if s.includeScore || s.options.Explain {
var scoreExplanation *search.Explanation
var tf float64
if termMatch.Freq < MaxSqrtCache {
tf = SqrtCache[int(termMatch.Freq)]
} else {
tf = math.Sqrt(float64(termMatch.Freq))
}
score, scoringModel := s.docScore(tf, termMatch.Norm)
if s.options.Explain {
childrenExplanations := s.scoreExplanation(tf, termMatch)
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("fieldWeight(%s:%s in %s), as per %s model, "+
"product of:", s.queryField, s.queryTerm, termMatch.ID, scoringModel),
Children: childrenExplanations,
}
}
// if the query weight isn't 1, multiply
if s.queryWeight != 1.0 {
score = score * s.queryWeight
if s.options.Explain {
childExplanations := make([]*search.Explanation, 2)
childExplanations[0] = s.queryWeightExplanation
childExplanations[1] = scoreExplanation
scoreExplanation = &search.Explanation{
Value: score,
Message: fmt.Sprintf("weight(%s:%s^%f in %s), product of:", s.queryField, s.queryTerm, s.queryBoost, termMatch.ID),
Children: childExplanations,
}
}
}
if s.includeScore {
rv.Score = score
}
if s.options.Explain {
rv.Expl = scoreExplanation
}
}
rv.IndexInternalID = index.NewIndexInternalIDFrom(rv.IndexInternalID, termMatch.ID)
if len(termMatch.Vectors) > 0 {
if cap(rv.FieldTermLocations) < len(termMatch.Vectors) {
rv.FieldTermLocations = make([]search.FieldTermLocation, 0, len(termMatch.Vectors))
}
for _, v := range termMatch.Vectors {
var ap search.ArrayPositions
if len(v.ArrayPositions) > 0 {
n := len(rv.FieldTermLocations)
if n < cap(rv.FieldTermLocations) { // reuse ap slice if available
ap = rv.FieldTermLocations[:n+1][n].Location.ArrayPositions[:0]
}
ap = append(ap, v.ArrayPositions...)
}
rv.FieldTermLocations =
append(rv.FieldTermLocations, search.FieldTermLocation{
Field: v.Field,
Term: s.queryTerm,
Location: search.Location{
Pos: v.Pos,
Start: v.Start,
End: v.End,
ArrayPositions: ap,
},
})
}
}
return rv
}
================================================
FILE: search/scorer/scorer_term_test.go
================================================
// Copyright (c) 2013 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorer
import (
"math"
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestTermScorer(t *testing.T) {
var docTotal uint64 = 100
var docTerm uint64 = 9
var queryTerm = []byte("beer")
var queryField = "desc"
var queryBoost = 1.0
scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, 0, search.SearcherOptions{Explain: true})
idf := 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0))
tests := []struct {
termMatch *index.TermFieldDoc
result *search.DocumentMatch
}{
// test some simple math
{
termMatch: &index.TermFieldDoc{
ID: index.IndexInternalID("one"),
Freq: 1,
Norm: 1.0,
Vectors: []*index.TermFieldVector{
{
Field: "desc",
Pos: 1,
Start: 0,
End: 4,
},
},
},
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: math.Sqrt(1.0) * idf,
Sort: []string{},
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), as per tf-idf model, product of:",
Children: []*search.Explanation{
{
Value: 1,
Message: "tf(termFreq(desc:beer)=1",
},
{
Value: 1,
Message: "fieldNorm(field=desc, doc=one)",
},
{
Value: idf,
Message: "idf(docFreq=9, maxDocs=100)",
},
},
},
Locations: search.FieldTermLocationMap{
"desc": search.TermLocationMap{
"beer": []*search.Location{
{
Pos: 1,
Start: 0,
End: 4,
},
},
},
},
},
},
// test the same thing again (score should be cached this time)
{
termMatch: &index.TermFieldDoc{
ID: index.IndexInternalID("one"),
Freq: 1,
Norm: 1.0,
},
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: math.Sqrt(1.0) * idf,
Sort: []string{},
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), as per tf-idf model, product of:",
Children: []*search.Explanation{
{
Value: 1,
Message: "tf(termFreq(desc:beer)=1",
},
{
Value: 1,
Message: "fieldNorm(field=desc, doc=one)",
},
{
Value: idf,
Message: "idf(docFreq=9, maxDocs=100)",
},
},
},
},
},
// test a case where the sqrt isn't precalculated
{
termMatch: &index.TermFieldDoc{
ID: index.IndexInternalID("one"),
Freq: 65,
Norm: 1.0,
},
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: math.Sqrt(65) * idf,
Sort: []string{},
Expl: &search.Explanation{
Value: math.Sqrt(65) * idf,
Message: "fieldWeight(desc:beer in one), as per tf-idf model, product of:",
Children: []*search.Explanation{
{
Value: math.Sqrt(65),
Message: "tf(termFreq(desc:beer)=65",
},
{
Value: 1,
Message: "fieldNorm(field=desc, doc=one)",
},
{
Value: idf,
Message: "idf(docFreq=9, maxDocs=100)",
},
},
},
},
},
}
for _, test := range tests {
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
}
actual := scorer.Score(ctx, test.termMatch)
actual.Complete(nil)
if len(actual.FieldTermLocations) == 0 {
actual.FieldTermLocations = nil
}
if !reflect.DeepEqual(actual, test.result) {
t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
}
}
}
func TestTermScorerWithQueryNorm(t *testing.T) {
var docTotal uint64 = 100
var docTerm uint64 = 9
var queryTerm = []byte("beer")
var queryField = "desc"
var queryBoost = 3.0
scorer := NewTermQueryScorer(queryTerm, queryField, queryBoost, docTotal, docTerm, 0, search.SearcherOptions{Explain: true})
idf := 1.0 + math.Log(float64(docTotal)/float64(docTerm+1.0))
scorer.SetQueryNorm(2.0)
expectedQueryWeight := 3 * idf * 3 * idf
actualQueryWeight := scorer.Weight()
if expectedQueryWeight != actualQueryWeight {
t.Errorf("expected query weight %f, got %f", expectedQueryWeight, actualQueryWeight)
}
tests := []struct {
termMatch *index.TermFieldDoc
result *search.DocumentMatch
}{
{
termMatch: &index.TermFieldDoc{
ID: index.IndexInternalID("one"),
Freq: 1,
Norm: 1.0,
},
result: &search.DocumentMatch{
IndexInternalID: index.IndexInternalID("one"),
Score: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0,
Sort: []string{},
Expl: &search.Explanation{
Value: math.Sqrt(1.0) * idf * 3.0 * idf * 2.0,
Message: "weight(desc:beer^3.000000 in one), product of:",
Children: []*search.Explanation{
{
Value: 2.0 * idf * 3.0,
Message: "queryWeight(desc:beer^3.000000), product of:",
Children: []*search.Explanation{
{
Value: 3,
Message: "boost",
},
{
Value: idf,
Message: "idf(docFreq=9, maxDocs=100)",
},
{
Value: 2,
Message: "queryNorm",
},
},
},
{
Value: math.Sqrt(1.0) * idf,
Message: "fieldWeight(desc:beer in one), as per tf-idf model, product of:",
Children: []*search.Explanation{
{
Value: 1,
Message: "tf(termFreq(desc:beer)=1",
},
{
Value: 1,
Message: "fieldNorm(field=desc, doc=one)",
},
{
Value: idf,
Message: "idf(docFreq=9, maxDocs=100)",
},
},
},
},
},
},
},
}
for _, test := range tests {
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
}
actual := scorer.Score(ctx, test.termMatch)
if !reflect.DeepEqual(actual, test.result) {
t.Errorf("expected %#v got %#v for %#v", test.result, actual, test.termMatch)
}
}
}
================================================
FILE: search/scorer/sqrt_cache.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package scorer
import (
"math"
)
var SqrtCache []float64
const MaxSqrtCache = 64
func init() {
SqrtCache = make([]float64, MaxSqrtCache)
for i := 0; i < MaxSqrtCache; i++ {
SqrtCache[i] = math.Sqrt(float64(i))
}
}
================================================
FILE: search/search.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"fmt"
"reflect"
"slices"
"sort"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var (
reflectStaticSizeDocumentMatch int
reflectStaticSizeSearchContext int
reflectStaticSizeLocation int
)
func init() {
var dm DocumentMatch
reflectStaticSizeDocumentMatch = int(reflect.TypeOf(dm).Size())
var sc SearchContext
reflectStaticSizeSearchContext = int(reflect.TypeOf(sc).Size())
var l Location
reflectStaticSizeLocation = int(reflect.TypeOf(l).Size())
}
type ArrayPositions []uint64
func (ap ArrayPositions) Equals(other ArrayPositions) bool {
return slices.Equal(ap, other)
}
func (ap ArrayPositions) Compare(other ArrayPositions) int {
for i, p := range ap {
if i >= len(other) {
return 1
}
if p < other[i] {
return -1
}
if p > other[i] {
return 1
}
}
if len(ap) < len(other) {
return -1
}
return 0
}
type Location struct {
// Pos is the position of the term within the field, starting at 1
Pos uint64 `json:"pos"`
// Start and End are the byte offsets of the term in the field
Start uint64 `json:"start"`
End uint64 `json:"end"`
// ArrayPositions contains the positions of the term within any elements.
ArrayPositions ArrayPositions `json:"array_positions"`
}
func (l *Location) Size() int {
return reflectStaticSizeLocation + size.SizeOfPtr +
len(l.ArrayPositions)*size.SizeOfUint64
}
type Locations []*Location
func (p Locations) Len() int { return len(p) }
func (p Locations) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
func (p Locations) Less(i, j int) bool {
c := p[i].ArrayPositions.Compare(p[j].ArrayPositions)
if c < 0 {
return true
}
if c > 0 {
return false
}
return p[i].Pos < p[j].Pos
}
func (p Locations) Dedupe() Locations { // destructive!
if len(p) <= 1 {
return p
}
sort.Sort(p)
slow := 0
for _, pfast := range p {
pslow := p[slow]
if pslow.Pos == pfast.Pos &&
pslow.Start == pfast.Start &&
pslow.End == pfast.End &&
pslow.ArrayPositions.Equals(pfast.ArrayPositions) {
continue // duplicate, so only move fast ahead
}
slow++
p[slow] = pfast
}
return p[:slow+1]
}
type TermLocationMap map[string]Locations
func (t TermLocationMap) AddLocation(term string, location *Location) {
t[term] = append(t[term], location)
}
type FieldTermLocationMap map[string]TermLocationMap
type FieldTermLocation struct {
Field string
Term string
Location Location
}
type FieldFragmentMap map[string][]string
type DocumentMatch struct {
Index string `json:"index,omitempty"`
ID string `json:"id"`
IndexInternalID index.IndexInternalID `json:"-"`
Score float64 `json:"score"`
Expl *Explanation `json:"explanation,omitempty"`
Locations FieldTermLocationMap `json:"locations,omitempty"`
Fragments FieldFragmentMap `json:"fragments,omitempty"`
Sort []string `json:"sort,omitempty"`
DecodedSort []string `json:"decoded_sort,omitempty"`
// Fields contains the values for document fields listed in
// SearchRequest.Fields. Text fields are returned as strings, numeric
// fields as float64s and date fields as strings.
Fields map[string]interface{} `json:"fields,omitempty"`
// used to maintain natural index order
HitNumber uint64 `json:"-"`
// used to temporarily hold field term location information during
// search processing in an efficient, recycle-friendly manner, to
// be later incorporated into the Locations map when search
// results are completed
FieldTermLocations []FieldTermLocation `json:"-"`
// used to indicate the sub-scores that combined to form the
// final score for this document match. This is only populated
// when the search request's query is a DisjunctionQuery.
// The map key is the index of the sub-query
// in the DisjunctionQuery. The map value is the
// sub-score for that sub-query.
ScoreBreakdown map[int]float64 `json:"score_breakdown,omitempty"`
// internal variable used in PreSearch phase of search in alias
// to indicate the name of the index that this match came from.
// used in knn search.
// it is a stack of index names, the top of the stack is the name
// of the index that this match came from
// of the current alias view, used in alias of aliases scenario
IndexNames []string `json:"index_names,omitempty"`
// Descendants holds the IDs of any child/descendant document that contributed
// to this root DocumentMatch.
Descendants []index.IndexInternalID `json:"-"`
}
func (dm *DocumentMatch) AddFieldValue(name string, value interface{}) {
if dm.Fields == nil {
dm.Fields = make(map[string]interface{})
}
existingVal, ok := dm.Fields[name]
if !ok {
dm.Fields[name] = value
return
}
valSlice, ok := existingVal.([]interface{})
if ok {
// already a slice, append to it
valSlice = append(valSlice, value)
} else {
// create a slice
valSlice = []interface{}{existingVal, value}
}
dm.Fields[name] = valSlice
}
func (dm *DocumentMatch) AddFragments(field string, fragments []string) {
if dm.Fragments == nil {
dm.Fragments = make(FieldFragmentMap)
}
OUTER:
for _, newFrag := range fragments {
for _, existingFrag := range dm.Fragments[field] {
if existingFrag == newFrag {
continue OUTER // no duplicates allowed
}
}
dm.Fragments[field] = append(dm.Fragments[field], newFrag)
}
}
// Reset allows an already allocated DocumentMatch to be reused
func (dm *DocumentMatch) Reset() *DocumentMatch {
// remember the []byte used for the IndexInternalID
indexInternalID := dm.IndexInternalID
// remember the []interface{} used for sort
sort := dm.Sort
// remember the []string used for decoded sort
decodedSort := dm.DecodedSort
// remember the FieldTermLocations backing array
ftls := dm.FieldTermLocations
for i := range ftls { // recycle the ArrayPositions of each location
ftls[i].Location.ArrayPositions = ftls[i].Location.ArrayPositions[:0]
}
// remember the score breakdown map
scoreBreakdown := dm.ScoreBreakdown
// clear out the score breakdown map
clear(scoreBreakdown)
// remember the Descendants backing array
descendants := dm.Descendants
for i := range descendants { // recycle each IndexInternalID
descendants[i] = descendants[i][:0]
}
// idiom to copy over from empty DocumentMatch (0 allocations)
*dm = DocumentMatch{}
// reuse the []byte already allocated (and reset len to 0)
dm.IndexInternalID = indexInternalID[:0]
// reuse the []interface{} already allocated (and reset len to 0)
dm.Sort = sort[:0]
// reuse the []string already allocated (and reset len to 0)
dm.DecodedSort = decodedSort[:0]
// reuse the FieldTermLocations already allocated (and reset len to 0)
dm.FieldTermLocations = ftls[:0]
// reuse the Descendants already allocated (and reset len to 0)
dm.Descendants = descendants[:0]
// reuse the score breakdown map already allocated (after clearing it)
dm.ScoreBreakdown = scoreBreakdown
return dm
}
func (dm *DocumentMatch) Size() int {
sizeInBytes := reflectStaticSizeDocumentMatch + size.SizeOfPtr +
len(dm.Index) +
len(dm.ID) +
len(dm.IndexInternalID)
if dm.Expl != nil {
sizeInBytes += dm.Expl.Size()
}
for k, v := range dm.Locations {
sizeInBytes += size.SizeOfString + len(k)
for k1, v1 := range v {
sizeInBytes += size.SizeOfString + len(k1) +
size.SizeOfSlice
for _, entry := range v1 {
sizeInBytes += entry.Size()
}
}
}
for k, v := range dm.Fragments {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfSlice
for _, entry := range v {
sizeInBytes += size.SizeOfString + len(entry)
}
}
for _, entry := range dm.Sort {
sizeInBytes += size.SizeOfString + len(entry)
}
for _, entry := range dm.DecodedSort {
sizeInBytes += size.SizeOfString + len(entry)
}
for k := range dm.Fields {
sizeInBytes += size.SizeOfString + len(k) +
size.SizeOfPtr
}
return sizeInBytes
}
// Complete performs final preparation & transformation of the
// DocumentMatch at the end of search processing, also allowing the
// caller to provide an optional preallocated locations slice
func (dm *DocumentMatch) Complete(prealloc []Location) []Location {
// transform the FieldTermLocations slice into the Locations map
nlocs := len(dm.FieldTermLocations)
if nlocs > 0 {
if cap(prealloc) < nlocs {
prealloc = make([]Location, nlocs)
}
prealloc = prealloc[:nlocs]
var lastField string
var tlm TermLocationMap
var needsDedupe bool
for i, ftl := range dm.FieldTermLocations {
if i == 0 || lastField != ftl.Field {
lastField = ftl.Field
if dm.Locations == nil {
dm.Locations = make(FieldTermLocationMap)
}
tlm = dm.Locations[ftl.Field]
if tlm == nil {
tlm = make(TermLocationMap)
dm.Locations[ftl.Field] = tlm
}
}
loc := &prealloc[i]
*loc = ftl.Location
if len(loc.ArrayPositions) > 0 { // copy
loc.ArrayPositions = append(ArrayPositions(nil), loc.ArrayPositions...)
}
locs := tlm[ftl.Term]
// if the loc is before or at the last location, then there
// might be duplicates that need to be deduplicated
if !needsDedupe && len(locs) > 0 {
last := locs[len(locs)-1]
cmp := loc.ArrayPositions.Compare(last.ArrayPositions)
if cmp < 0 || (cmp == 0 && loc.Pos <= last.Pos) {
needsDedupe = true
}
}
tlm[ftl.Term] = append(locs, loc)
dm.FieldTermLocations[i] = FieldTermLocation{ // recycle
Location: Location{
ArrayPositions: ftl.Location.ArrayPositions[:0],
},
}
}
if needsDedupe {
for _, tlm := range dm.Locations {
for term, locs := range tlm {
tlm[term] = locs.Dedupe()
}
}
}
}
dm.FieldTermLocations = dm.FieldTermLocations[:0] // recycle
return prealloc
}
func (dm *DocumentMatch) String() string {
return fmt.Sprintf("[%s-%f]", dm.ID, dm.Score)
}
type DocumentMatchCollection []*DocumentMatch
func (c DocumentMatchCollection) Len() int { return len(c) }
func (c DocumentMatchCollection) Swap(i, j int) { c[i], c[j] = c[j], c[i] }
func (c DocumentMatchCollection) Less(i, j int) bool { return c[i].Score > c[j].Score }
type Searcher interface {
Next(ctx *SearchContext) (*DocumentMatch, error)
Advance(ctx *SearchContext, ID index.IndexInternalID) (*DocumentMatch, error)
Close() error
Weight() float64
SetQueryNorm(float64)
Count() uint64
Min() int
Size() int
DocumentMatchPoolSize() int
}
type SearcherOptions struct {
Explain bool
IncludeTermVectors bool
Score string
}
// SearchContext represents the context around a single search
type SearchContext struct {
DocumentMatchPool *DocumentMatchPool
Collector Collector
IndexReader index.IndexReader
}
func (sc *SearchContext) Size() int {
sizeInBytes := reflectStaticSizeSearchContext + size.SizeOfPtr +
reflectStaticSizeDocumentMatchPool + size.SizeOfPtr
if sc.DocumentMatchPool != nil {
for _, entry := range sc.DocumentMatchPool.avail {
if entry != nil {
sizeInBytes += entry.Size()
}
}
}
return sizeInBytes
}
// A NestedDocumentMatch is like a DocumentMatch but used for nested documents
// and does not have score or locations, or a score and is mainly used to
// hold field values and fragments, to be embedded in the parent DocumentMatch
type NestedDocumentMatch struct {
Fields map[string]interface{} `json:"fields,omitempty"`
Fragments FieldFragmentMap `json:"fragments,omitempty"`
}
// NewNestedDocumentMatch creates a new NestedDocumentMatch instance
// with the given fields and fragments
func NewNestedDocumentMatch(fields map[string]interface{}, fragments FieldFragmentMap) *NestedDocumentMatch {
return &NestedDocumentMatch{
Fields: fields,
Fragments: fragments,
}
}
================================================
FILE: search/search_test.go
================================================
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"encoding/json"
"reflect"
"testing"
)
func TestArrayPositionsCompare(t *testing.T) {
tests := []struct {
a []uint64
b []uint64
expect int
}{
{nil, nil, 0},
{[]uint64{}, []uint64{}, 0},
{[]uint64{1}, []uint64{}, 1},
{[]uint64{1}, []uint64{1}, 0},
{[]uint64{}, []uint64{1}, -1},
{[]uint64{0}, []uint64{1}, -1},
{[]uint64{1}, []uint64{0}, 1},
{[]uint64{1}, []uint64{1, 2}, -1},
{[]uint64{1, 2}, []uint64{1}, 1},
{[]uint64{1, 2}, []uint64{1, 2}, 0},
{[]uint64{1, 2}, []uint64{1, 200}, -1},
{[]uint64{1, 2}, []uint64{100, 2}, -1},
{[]uint64{1, 2}, []uint64{1, 2, 3}, -1},
}
for _, test := range tests {
res := ArrayPositions(test.a).Compare(test.b)
if res != test.expect {
t.Errorf("test: %+v, res: %v", test, res)
}
}
}
func TestLocationsDedupe(t *testing.T) {
a := &Location{}
b := &Location{Pos: 1}
c := &Location{Pos: 2}
tests := []struct {
input Locations
expect Locations
}{
{Locations{}, Locations{}},
{Locations{a}, Locations{a}},
{Locations{a, b, c}, Locations{a, b, c}},
{Locations{a, a}, Locations{a}},
{Locations{a, a, a}, Locations{a}},
{Locations{a, b}, Locations{a, b}},
{Locations{b, a}, Locations{a, b}},
{Locations{c, b, a, c, b, a, c, b, a}, Locations{a, b, c}},
}
for testi, test := range tests {
res := test.input.Dedupe()
if !reflect.DeepEqual(res, test.expect) {
t.Errorf("testi: %d, test: %+v, res: %+v", testi, test, res)
}
}
}
func TestMarshallingHighTerm(t *testing.T) {
highTermBytes, err := json.Marshal(HighTerm)
if err != nil {
t.Fatal(err)
}
var unmarshalledHighTerm string
err = json.Unmarshal(highTermBytes, &unmarshalledHighTerm)
if err != nil {
t.Fatal(err)
}
if unmarshalledHighTerm != HighTerm {
t.Fatalf("unexpected %x != %x", unmarshalledHighTerm, HighTerm)
}
}
================================================
FILE: search/searcher/base_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"math"
"regexp"
"github.com/blevesearch/bleve/v2/analysis"
regexpTokenizer "github.com/blevesearch/bleve/v2/analysis/tokenizer/regexp"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/index/upsidedown"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
index "github.com/blevesearch/bleve_index_api"
)
var twoDocIndex index.Index
func init() {
twoDocIndex = initTwoDocUpsideDown()
}
func initTwoDocUpsideDown() index.Index {
analysisQueue := index.NewAnalysisQueue(1)
twoDocIndex, err := upsidedown.NewUpsideDownCouch(
gtreap.Name,
map[string]interface{}{
"path": "",
}, analysisQueue)
if err != nil {
panic(err)
}
initTwoDocs(twoDocIndex)
return twoDocIndex
}
func initTwoDocScorch(dir string) index.Index {
analysisQueue := index.NewAnalysisQueue(1)
twoDocIndex, err := scorch.NewScorch(
scorch.Name,
map[string]interface{}{
"path": dir,
}, analysisQueue)
if err != nil {
panic(err)
}
initTwoDocs(twoDocIndex)
return twoDocIndex
}
func initTwoDocs(twoDocIndex index.Index) {
err := twoDocIndex.Open()
if err != nil {
panic(err)
}
batch := index.NewBatch()
for _, doc := range twoDocIndexDocs {
batch.Update(doc)
}
err = twoDocIndex.Batch(batch)
if err != nil {
panic(err)
}
}
// create a simpler analyzer which will support these tests
var testAnalyzer = &analysis.DefaultAnalyzer{
Tokenizer: regexpTokenizer.NewRegexpTokenizer(regexp.MustCompile(`\w+`)),
}
// sets up some mock data used in many tests in this package
var twoDocIndexDescIndexingOptions = document.DefaultTextIndexingOptions | index.IncludeTermVectors
var twoDocIndexDocs = []*document.Document{
// must have 4/4 beer
document.NewDocument("1").
AddField(document.NewTextField("name", []uint64{}, []byte("marty"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("street", []uint64{}, []byte("couchbase way"), testAnalyzer)),
// must have 1/4 beer
document.NewDocument("2").
AddField(document.NewTextField("name", []uint64{}, []byte("steve"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("angst beer couch database"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("street", []uint64{}, []byte("couchbase way"), testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)),
// must have 1/4 beer
document.NewDocument("3").
AddField(document.NewTextField("name", []uint64{}, []byte("dustin"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("apple beer column dank"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)),
// must have 65/65 beer
document.NewDocument("4").
AddField(document.NewTextField("name", []uint64{}, []byte("ravi"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer beer"), twoDocIndexDescIndexingOptions, testAnalyzer)),
// must have 0/x beer
document.NewDocument("5").
AddField(document.NewTextField("name", []uint64{}, []byte("bobert"))).
AddField(document.NewTextFieldCustom("desc", []uint64{}, []byte("water"), twoDocIndexDescIndexingOptions, testAnalyzer)).
AddField(document.NewTextFieldWithAnalyzer("title", []uint64{}, []byte("mister"), testAnalyzer)),
}
func scoresCloseEnough(a, b float64) bool {
return math.Abs(a-b) < 0.001
}
================================================
FILE: search/searcher/geoshape_contains_test.go
================================================
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/document"
index "github.com/blevesearch/bleve_index_api"
)
var (
leftRectEdgeMultiPoint [][]float64 = [][]float64{{-1, 0.2}, {-0.9, 0.1}}
leftRectWithHole [][][]float64 = [][][]float64{
{{-1, 0}, {0, 0}, {0, 1}, {-1, 1}, {-1, 0}},
{{-0.75, 0.25}, {-0.75, -0.75}, {-0.25, 0.75}, {-0.25, 0.25}, {-0.74, 0.25}},
}
leftRectEdgePoint []float64 = []float64{-1, 0.2}
leftRectMultiPoint [][]float64 = [][]float64{{0.5, 0.5}, {-0.9, 0.1}}
)
func testCaseSetup(t *testing.T, docShapeName, docShapeType string, docShapeVertices [][][][]float64,
i index.Index,
) (index.IndexReader, func() error, error) {
doc := document.NewDocument(docShapeName)
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
docShapeVertices, docShapeType, document.DefaultGeoShapeIndexingOptions))
err := i.Update(doc)
if err != nil {
return nil, nil, err
}
indexReader, err := i.Reader()
if err != nil {
t.Fatal(err)
}
closeFn := func() error {
err = i.Delete(doc.ID())
if err != nil {
return err
}
err = indexReader.Close()
if err != nil {
return err
}
return nil
}
return indexReader, closeFn, nil
}
func TestPointPolygonContains(t *testing.T) {
tests := []struct {
QueryShape []float64
DocShapeVertices [][][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: rightRectPoint,
DocShapeVertices: rightRect,
DocShapeName: "polygon1",
Expected: []string{"polygon1"},
Desc: "point inside polygon",
QueryType: "contains",
},
{
QueryShape: leftRectPoint,
DocShapeVertices: nil,
DocShapeName: "",
Expected: nil,
Desc: "empty polygon",
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "polygon", [][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery(test.QueryType, false, indexReader, [][]float64{test.QueryShape}, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for point: %+v", test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestLinestringPolygonContains(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][][]float64
DocShapeName string
Desc string
Expected []string
QueryType string
}{
{
QueryShape: [][]float64{{1, 2}, {3, 5}},
DocShapeVertices: [][][]float64{{{1, 2}, {3, 5}, {2, 7}, {1, 2}}},
DocShapeName: "polygon1",
Desc: "linestring coinciding with edge of the polygon",
Expected: []string{"polygon1"},
QueryType: "contains",
},
{
QueryShape: [][]float64{{1, 0}, {0, 1}},
DocShapeVertices: rightRect,
DocShapeName: "polygon1",
Desc: "diagonal of a square",
Expected: []string{"polygon1"},
QueryType: "contains",
},
{
QueryShape: [][]float64{{0.2, 0.2}, {0.8, 0.8}},
DocShapeVertices: rightRect,
DocShapeName: "polygon1",
Desc: "linestring within polygon",
Expected: []string{"polygon1"},
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "polygon",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeLinestringQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for linestring: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestEnvelopePointContains(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices []float64
DocShapeName string
Desc string
Expected []string
QueryType string
}{
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: []float64{0.5, 0.5},
DocShapeName: "point1",
Desc: "point completely within bounded rectangle",
Expected: nil, // will always be nil since point can't contain envelope
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "point",
[][][][]float64{{{test.DocShapeVertices}}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeEnvelopeRelationQuery(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for Envelope: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestEnvelopeLinestringContains(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][]float64
DocShapeName string
Desc string
Expected []string
QueryType string
}{
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: [][]float64{{0.5, 0.5}, {10, 10}},
DocShapeName: "linestring1",
Desc: "linestring partially within bounded rectangle",
Expected: nil, // will always be nil since linestring can't contain envelope
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "linestring",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeEnvelopeRelationQuery(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for Envelope: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestEnvelopePolygonContains(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][][]float64
DocShapeName string
Desc string
Expected []string
QueryType string
}{
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: [][][]float64{{{0.5, 0.5}, {1, 0.5}, {1, 1}, {0.5, 1}, {0.5, 0.5}}},
DocShapeName: "polygon1",
Desc: "polygon completely within bounded rectangle",
Expected: nil,
QueryType: "contains",
},
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: [][][]float64{{{10.5, 10.5}, {11.5, 10.5}, {11.5, 11.5}, {10.5, 11.5}, {10.5, 10.5}}},
DocShapeName: "polygon1",
Desc: "polygon completely outside bounded rectangle",
Expected: nil,
QueryType: "contains",
},
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: rightRect,
DocShapeName: "polygon1",
Desc: "polygon coincident with bounded rectangle",
Expected: []string{"polygon1"},
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "polygon",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeEnvelopeRelationQuery(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for Envelope: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPolygonPointContains(t *testing.T) {
tests := []struct {
QueryShape [][][]float64
DocShapeVertices []float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: rightRect,
DocShapeVertices: rightRectPoint,
DocShapeName: "point1",
Expected: nil, // nil since point is a non-closed shape
Desc: "point inside polygon",
QueryType: "contains",
},
{
QueryShape: leftRect,
DocShapeVertices: leftRectEdgePoint,
DocShapeName: "point1",
Expected: nil,
Desc: "point on edge of polygon",
QueryType: "contains",
},
{
QueryShape: leftRectWithHole,
DocShapeVertices: leftRectPoint,
DocShapeName: "point1",
Expected: nil,
Desc: "point in polygon's hole",
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "point",
[][][][]float64{{{test.DocShapeVertices}}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePolygonQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPolygonLinestringContains(t *testing.T) {
tests := []struct {
QueryShape [][][]float64
DocShapeVertices [][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: rightRect,
DocShapeVertices: [][]float64{{0, 1}, {1, 0}},
DocShapeName: "linestring1",
Expected: nil, // nil since linestring is a non-closed shape
Desc: "diagonal of a square",
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "linestring",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePolygonQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPolygonEnvelopeContains(t *testing.T) {
tests := []struct {
QueryShape [][][]float64
DocShapeVertices [][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: [][][]float64{{{0.5, 0.5}, {1, 0.5}, {1, 1}, {0.5, 1}, {0.5, 0.5}}},
DocShapeVertices: [][]float64{{0, 1}, {1, 0}},
DocShapeName: "envelope1",
Expected: nil,
Desc: "polygon contained inside envelope with edge overlaps", // this fails since
// contains doesn't include edges or vertices
QueryType: "contains",
},
{
QueryShape: [][][]float64{{{0.25, 0.25}, {0.5, 0.25}, {0.5, 0.5}, {0.25, 0.25}, {0.25, 0.25}}},
DocShapeVertices: [][]float64{{0, 1}, {1, 0}},
DocShapeName: "envelope1",
Expected: []string{"envelope1"},
Desc: "polygon contained completely inside envelope",
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "envelope",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePolygonQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiPointPolygonContains(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: leftRectEdgeMultiPoint,
DocShapeVertices: leftRectWithHole,
DocShapeName: "polygon1",
Expected: []string{"polygon1"},
Desc: "multi point inside polygon with hole",
QueryType: "contains",
},
{
QueryShape: [][]float64{{1, 0.5}},
DocShapeVertices: rightRect,
DocShapeName: "polygon1",
Expected: nil,
Desc: "multi point on polygon edge",
QueryType: "contains",
},
{
QueryShape: [][]float64{{0.3, 0.3}, {0.5, 0.5}},
DocShapeVertices: [][][]float64{
{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}},
{{0.2, 0.2}, {0.4, 0.2}, {0.4, 0.4}, {0.2, 0.4}, {0.2, 0.2}},
},
DocShapeName: "polygon1",
Expected: nil, // returns nil since one of the points is within the hole
Desc: "multi point inside polygon and hole",
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "polygon",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery(test.QueryType,
true, indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for multipoint: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiPointLinestringContains(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: leftRectEdgeMultiPoint,
DocShapeVertices: [][]float64{{-1, 0.2}, {-0.9, 0.1}},
DocShapeName: "linestring1",
Expected: []string{"linestring1"},
Desc: "multi point overlaps with all linestring end points",
QueryType: "contains",
},
{
QueryShape: [][]float64{{-1, 0.2}, {-0.9, 0.1}, {0.5, 0.5}},
DocShapeVertices: [][]float64{{-1, 0.2}, {-0.9, 0.1}},
DocShapeName: "linestring1",
Expected: nil,
Desc: "multi point overlaps with some linestring end points",
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "linestring",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery(test.QueryType,
true, indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for multipoint: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiPointContains(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: leftRectEdgeMultiPoint,
DocShapeVertices: [][]float64{{-1, 0.2}, {-0.9, 0.1}},
DocShapeName: "multipoint1",
Expected: []string{"multipoint1"},
Desc: "multi point overlaps with all multi points",
QueryType: "contains",
},
{
QueryShape: [][]float64{{-1, 0.2}, {-0.9, 0.1}, {0.5, 0.5}},
DocShapeVertices: [][]float64{{-1, 0.2}, {-0.9, 0.1}},
DocShapeName: "multipoint1",
Expected: nil,
Desc: "multi point overlaps with some multi points",
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multipoint",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery(test.QueryType,
true, indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for multipoint: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPolygonContains(t *testing.T) {
tests := []struct {
QueryShape [][][]float64
DocShapeVertices [][][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: leftRect,
DocShapeVertices: rightRect,
DocShapeName: "polygon1",
Expected: nil,
Desc: "polygons sharing an edge",
QueryType: "contains",
},
{
QueryShape: rightRect,
DocShapeVertices: rightRect,
DocShapeName: "polygon1",
Expected: []string{"polygon1"},
Desc: "coincident polygons",
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "polygon",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePolygonQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPolygonMultiPointContains(t *testing.T) {
tests := []struct {
QueryShape [][][]float64
DocShapeVertices [][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: leftRect,
DocShapeVertices: leftRectEdgeMultiPoint,
DocShapeName: "multipoint1",
Expected: nil, // nil since multipoint is a non-closed shape
Desc: "multiple points on polygon edge",
QueryType: "contains",
},
{
QueryShape: leftRect,
DocShapeVertices: leftRectMultiPoint,
DocShapeName: "multipoint1",
Expected: nil,
Desc: "multiple points, both outside and inside polygon",
QueryType: "contains",
},
{
QueryShape: leftRectWithHole,
DocShapeVertices: leftRectMultiPoint,
DocShapeName: "multipoint1",
Expected: nil,
Desc: "multiple points in polygon hole",
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multipoint",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePolygonQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiPolygonPolygonContains(t *testing.T) {
tests := []struct {
QueryShape [][][][]float64
DocShapeVertices [][][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: [][][][]float64{leftRect},
DocShapeVertices: leftRect,
DocShapeName: "polygon1",
Expected: []string{"polygon1"},
Desc: "coincident polygons",
QueryType: "contains",
},
{
QueryShape: [][][][]float64{{{{2, 2}, {-2, 2}, {-2, -2}, {2, -2}}}},
DocShapeVertices: leftRect,
DocShapeName: "polygon1",
Expected: nil,
Desc: "polygon larger than polygons in query shape",
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "polygon",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeMultiPolygonQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiLinestringMultiPolygonContains(t *testing.T) {
tests := []struct {
QueryShape [][][]float64
DocShapeVertices [][][][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: [][][]float64{{{0.2, 1}, {0.8, 1}}, {{1, 0.2}, {1, 0.8}}},
DocShapeVertices: [][][][]float64{rightRect},
DocShapeName: "multipolygon1",
Expected: nil, // contains doesn't include edges or vertices
Desc: "linestrings on edge of polygon",
QueryType: "contains",
},
{
QueryShape: [][][]float64{{{0.2, 0.2}, {0.8, 0.8}}, {{0.8, 0.2}, {0.2, 0.8}}},
DocShapeVertices: [][][][]float64{{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}}},
DocShapeName: "multipolygon1",
Expected: []string{"multipolygon1"},
Desc: "linestrings within polygon",
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multipolygon", test.DocShapeVertices, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeMultiLinestringQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for multilinestring: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestGeometryCollectionPolygonContains(t *testing.T) {
tests := []struct {
QueryShape [][][][][]float64
QueryShapeTypes []string
DocShapeVertices [][][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: [][][][][]float64{{{{{0, 1}, {1, 0}}}}},
QueryShapeTypes: []string{"linestring"},
DocShapeVertices: rightRect,
DocShapeName: "polygon1",
Expected: []string{"polygon1"},
Desc: "linestring on edge of polygon",
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "polygon",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeGeometryCollectionRelationQuery(test.QueryType,
indexReader, test.QueryShape, test.QueryShapeTypes, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestGeometryCollectionMultiPolygonContains(t *testing.T) {
tests := []struct {
QueryShape [][][][][]float64
QueryShapeTypes []string
DocShapeVertices [][][][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: [][][][][]float64{{{{{1, 1}}}}},
QueryShapeTypes: []string{"point"},
DocShapeVertices: [][][][]float64{rightRect, leftRect},
DocShapeName: "multipolygon1",
Expected: []string{"multipolygon1"},
Desc: "point on vertex of one of the polygons",
QueryType: "contains",
},
{
// WIP - Adding a point (-0.5,-0.5)
QueryShape: [][][][][]float64{{{{{0.2, 0.4}, {0.2, 0.2}, {0.4, 0.2}, {0.4, 0.4}}}}},
QueryShapeTypes: []string{"polygon"},
DocShapeVertices: [][][][]float64{rightRect, leftRect},
DocShapeName: "multipolygon1",
Expected: []string{"multipolygon1"},
Desc: "polygon contained completely within multipolygons",
QueryType: "contains",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multipolygon", test.DocShapeVertices, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeGeometryCollectionRelationQuery(test.QueryType,
indexReader, test.QueryShape, test.QueryShapeTypes, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for multipolygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
================================================
FILE: search/searcher/geoshape_intersects_test.go
================================================
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
index "github.com/blevesearch/bleve_index_api"
)
func setupIndex(t *testing.T) index.Index {
analysisQueue := index.NewAnalysisQueue(1)
i, err := scorch.NewScorch(
gtreap.Name,
map[string]interface{}{
"path": "",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
return i
}
func TestPointIntersects(t *testing.T) {
tests := []struct {
QueryShape []float64
DocShapeVertices []float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: []float64{2.0, 2.0},
DocShapeVertices: []float64{2.0, 2.0},
DocShapeName: "point1",
Desc: "coincident points",
Expected: []string{"point1"},
},
{
QueryShape: []float64{2.0, 2.0},
DocShapeVertices: []float64{2.0, 2.1},
DocShapeName: "point2",
Desc: "non coincident points",
Expected: nil,
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "point",
[][][][]float64{{{test.DocShapeVertices}}}, i)
if err != nil {
t.Error(err.Error())
}
// indexing and searching independently for each case.
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery("intersects",
false, indexReader, [][]float64{test.QueryShape}, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for point: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPointMultiPointIntersects(t *testing.T) {
tests := []struct {
QueryShape []float64
DocShapeVertices [][]float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: []float64{2.0, 2.0},
DocShapeVertices: [][]float64{{2.0, 2.0}, {3.0, 2.0}},
DocShapeName: "point1",
Desc: "point coincides with one point in multipoint",
Expected: []string{"point1"},
},
{
QueryShape: []float64{2.0, 2.0},
DocShapeVertices: [][]float64{{2.0, 2.1}, {3.0, 3.1}},
DocShapeName: "point2",
Desc: "non coincident points",
Expected: nil,
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "point",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
// indexing and searching independently for each case.
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery("intersects",
false, indexReader, [][]float64{test.QueryShape}, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for point: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPointLinestringIntersects(t *testing.T) {
tests := []struct {
QueryShape []float64
DocShapeVertices [][]float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: []float64{4.0, 4.0},
DocShapeVertices: [][]float64{{2.0, 2.0}, {3.0, 3.0}, {4.0, 4.0}},
DocShapeName: "linestring1",
Desc: "point at the vertex of linestring",
Expected: []string{"linestring1"},
},
{
QueryShape: []float64{1.5, 1.5001714},
DocShapeVertices: [][]float64{{0.0, 0.0}, {1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}},
DocShapeName: "linestring1",
Desc: "point along linestring",
Expected: nil, // nil since point is said to intersect only when it matches any
// of the endpoints of the linestring
},
{
QueryShape: []float64{1.5, 1.6001714},
DocShapeVertices: [][]float64{{0.0, 0.0}, {1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}},
DocShapeName: "linestring1",
Desc: "point outside linestring",
Expected: nil,
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "linestring",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery("intersects",
false, indexReader, [][]float64{test.QueryShape}, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for point: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPointMultiLinestringIntersects(t *testing.T) {
tests := []struct {
QueryShape []float64
DocShapeVertices [][][]float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: []float64{3.0, 3.0},
DocShapeVertices: [][][]float64{{{2.0, 2.0}, {3.0, 3.0}, {4.0, 4.0}}},
DocShapeName: "linestring1",
Desc: "point at the vertex of linestring",
Expected: []string{"linestring1"},
},
{
QueryShape: []float64{1.5, 1.5001714},
DocShapeVertices: [][][]float64{{{0.0, 0.0}, {1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}}},
DocShapeName: "linestring1",
Desc: "point along a linestring",
Expected: nil, // nil since point is said to intersect only when it matches any
// of the endpoints of any of the linestrings
},
{
QueryShape: []float64{1.5, 1.6001714},
DocShapeVertices: [][][]float64{{{0.0, 0.0}, {1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}}, {{1, 1.1}, {2, 2.1}, {3, 3.4}}},
DocShapeName: "linestring1",
Desc: "point outside all linestrings",
Expected: nil,
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multilinestring",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery("intersects",
false, indexReader, [][]float64{test.QueryShape}, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for point: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPointPolygonIntersects(t *testing.T) {
tests := []struct {
QueryShape []float64
DocShapeVertices [][][]float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: []float64{3.0, 3.0},
DocShapeVertices: [][][]float64{{{2.0, 2.0}, {3.0, 3.0}, {1.0, 3.0}, {2.0, 2.0}}},
DocShapeName: "polygon1",
Desc: "point on polygon vertex",
Expected: []string{"polygon1"},
},
{
QueryShape: []float64{1.5, 1.500714},
DocShapeVertices: [][][]float64{{{1.0, 1.0}, {2.0, 2.0}, {0.0, 2.0}, {1.0, 1.0}}},
DocShapeName: "polygon1",
Desc: "point on polygon edge",
Expected: []string{"polygon1"},
},
{
QueryShape: []float64{1.5, 1.9},
DocShapeVertices: [][][]float64{{{1.0, 1.0}, {2.0, 2.0}, {0.0, 2.0}, {1.0, 1.0}}},
DocShapeName: "polygon1",
Desc: "point inside polygon",
Expected: []string{"polygon1"},
},
{
QueryShape: []float64{0.3, 0.3},
DocShapeVertices: [][][]float64{
{{0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}},
{{0.2, 0.2}, {0.2, 0.4}, {0.4, 0.4}, {0.4, 0.2}, {0.2, 0.2}},
},
DocShapeName: "polygon1",
Desc: "point inside hole inside polygon",
Expected: nil,
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "polygon",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery("intersects",
false, indexReader, [][]float64{test.QueryShape}, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for point: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPointMultiPolygonIntersects(t *testing.T) {
tests := []struct {
QueryShape []float64
DocShapeVertices [][][][]float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: []float64{3.0, 3.0},
DocShapeVertices: [][][][]float64{{{{2.0, 2.0}, {3.0, 3.0}, {1.0, 3.0}, {2.0, 2.0}}}},
DocShapeName: "multipolygon1",
Desc: "point on a polygon vertex",
Expected: []string{"multipolygon1"},
},
{
QueryShape: []float64{1.5, 1.500714},
DocShapeVertices: [][][][]float64{{{{1.0, 1.0}, {2.0, 2.0}, {0.0, 2.0}, {1.0, 1.0}}}},
DocShapeName: "multipolygon1",
Desc: "point on polygon edge",
Expected: []string{"multipolygon1"},
},
{
QueryShape: []float64{1.5, 1.9},
DocShapeVertices: [][][][]float64{
{{{1.0, 1.0}, {2.0, 2.0}, {0.0, 2.0}, {1.0, 1.0}}},
{{{1.5, 1.9}, {2.5, 2.9}, {0.5, 2.9}, {1.5, 1.9}}},
},
DocShapeName: "multipolygon1",
Desc: "point inside a polygon and on vertex of another polygon",
Expected: []string{"multipolygon1"},
},
{
QueryShape: []float64{0.3, 0.3},
DocShapeVertices: [][][][]float64{{
{{0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}},
{{0.2, 0.2}, {0.2, 0.4}, {0.4, 0.4}, {0.4, 0.2}, {0.2, 0.2}},
}},
DocShapeName: "multipolygon1",
Desc: "point inside hole inside one of the polygons",
Expected: nil,
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multipolygon", test.DocShapeVertices, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery("intersects",
false, indexReader, [][]float64{test.QueryShape}, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for point: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestEnvelopePointIntersects(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices []float64
DocShapeName string
Desc string
Expected []string
QueryType string
}{
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: rightRectPoint,
DocShapeName: "point1",
Desc: "point on vertex of bounded rectangle",
Expected: []string{"point1"},
QueryType: "intersects",
},
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: []float64{10, 10},
DocShapeName: "point1",
Desc: "point outside bounded rectangle",
Expected: nil,
QueryType: "intersects",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "point",
[][][][]float64{{{test.DocShapeVertices}}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeEnvelopeRelationQuery(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for Envelope: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestEnvelopeLinestringIntersect(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][]float64
DocShapeName string
Desc string
Expected []string
QueryType string
}{
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: [][]float64{{0.25, 0.25}, {0.5, 0.5}},
DocShapeName: "linestring1",
Desc: "linestring completely in bounded rectangle",
Expected: []string{"linestring1"},
QueryType: "intersects",
},
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: [][]float64{{2.5, 2.5}, {4.5, 4.5}},
DocShapeName: "linestring1",
Desc: "linestring outside bounded rectangle",
Expected: nil,
QueryType: "intersects",
},
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: [][]float64{{0.25, 0.25}, {4.5, 4.5}},
DocShapeName: "linestring1",
Desc: "linestring partially in bounded rectangle",
Expected: []string{"linestring1"},
QueryType: "intersects",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "linestring",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeEnvelopeRelationQuery(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for Envelope: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestEnvelopePolygonIntersect(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][][]float64
DocShapeName string
Desc string
Expected []string
QueryType string
}{
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: [][][]float64{{{0.5, 0.5}, {1.5, 0.5}, {1.5, 1.5}, {0.5, 1.5}, {0.5, 0.5}}},
DocShapeName: "polygon1",
Desc: "polygon intersects bounded rectangle",
Expected: []string{"polygon1"},
QueryType: "intersects",
},
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: [][][]float64{{{10.5, 10.5}, {11.5, 10.5}, {11.5, 11.5}, {10.5, 11.5}, {10.5, 10.5}}},
DocShapeName: "polygon1",
Desc: "polygon completely outside bounded rectangle",
Expected: nil,
QueryType: "intersects",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "polygon",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeEnvelopeRelationQuery(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for Envelope: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiPointIntersects(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][]float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: [][]float64{{3.0, 3.0}, {4.0, 4.0}},
DocShapeVertices: [][]float64{{4.0, 4.0}},
DocShapeName: "multipoint1",
Desc: "single coincident multipoint",
Expected: []string{"multipoint1"},
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multipoint",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery("intersects",
true, indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for multipoint: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestLinestringIntersects(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][]float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: [][]float64{{3.0, 2.0}, {4.0, 2.0}},
DocShapeVertices: [][]float64{{3.0, 2.0}, {4.0, 2.0}},
DocShapeName: "linestring1",
Desc: "coincident linestrings",
Expected: []string{"linestring1"},
},
{
QueryShape: [][]float64{{1.0, 1.0}, {1.5, 1.5}, {2.0, 2.0}},
DocShapeVertices: [][]float64{{2.0, 2.0}, {4.0, 3.0}},
DocShapeName: "linestring1",
Desc: "linestrings intersecting at the ends",
Expected: []string{"linestring1"},
},
{
QueryShape: [][]float64{{1.0, 1.0}, {3.0, 3.0}},
DocShapeVertices: [][]float64{{1.5499860, 1.5501575}, {4.0, 6.0}},
DocShapeName: "linestring1",
Desc: "subline not at vertex",
Expected: []string{"linestring1"},
},
{
QueryShape: [][]float64{{1.0, 1.0}, {2.0, 2.0}},
DocShapeVertices: [][]float64{{1.5499860, 1.5501575}, {1.5, 1.5001714}},
DocShapeName: "linestring1",
Desc: "subline inside linestring",
Expected: []string{"linestring1"},
},
{
QueryShape: [][]float64{{1.0, 1.0}, {1.5, 1.5}, {2.0, 2.0}},
DocShapeVertices: [][]float64{{1.0, 2.0}, {2.0, 1.0}},
DocShapeName: "linestring1",
Desc: "linestrings intersecting at some edge",
Expected: []string{"linestring1"},
},
{
QueryShape: [][]float64{{1.0, 1.0}, {1.5, 1.5}, {2.0, 2.0}},
DocShapeVertices: [][]float64{{1.0, 2.0}, {1.0, 4.0}},
DocShapeName: "linestring1",
Desc: "non intersecting linestrings",
Expected: nil,
},
{
QueryShape: [][]float64{{59.32, 0.52}, {68.99, -7.36}, {75.49, -12.21}},
DocShapeVertices: [][]float64{{71.98, 0}, {67.58, -6.57}, {63.19, -12.72}},
DocShapeName: "linestring1",
Desc: "linestrings with more than 2 points intersecting at some edges",
Expected: []string{"linestring1"},
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "linestring",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeLinestringQueryWithRelation("intersects",
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestLinestringPolygonIntersects(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][][]float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: [][]float64{{1.0, 1.0}, {1.5, 1.5}, {2.0, 2.0}},
DocShapeVertices: [][][]float64{{{0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}}},
DocShapeName: "polygon1",
Desc: "linestring intersects polygon at a vertex",
Expected: []string{"polygon1"},
},
{
QueryShape: [][]float64{{0.2, 0.2}, {0.4, 0.4}},
DocShapeVertices: [][][]float64{{{0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}}},
DocShapeName: "polygon1",
Desc: "linestring within polygon",
Expected: []string{"polygon1"},
},
{
QueryShape: [][]float64{{-0.5, 0.5}, {0.5, 0.5}},
DocShapeVertices: [][][]float64{{{0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}}},
DocShapeName: "polygon1",
Desc: "linestring intersects polygon at an edge",
Expected: []string{"polygon1"},
},
{
QueryShape: [][]float64{{-0.5, 0.5}, {1.5, 0.5}},
DocShapeVertices: [][][]float64{{{0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}}},
DocShapeName: "polygon1",
Desc: "linestring intersects polygon as a whole",
Expected: []string{"polygon1"},
},
{
QueryShape: [][]float64{{-0.5, 0.5}, {-1.5, -1.5}},
DocShapeVertices: [][][]float64{{{0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}}},
DocShapeName: "polygon1",
Desc: "linestring does not intersect polygon",
Expected: nil,
},
{
QueryShape: [][]float64{{0.3, 0.3}, {0.35, 0.35}},
DocShapeVertices: [][][]float64{
{{0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}},
{{0.2, 0.2}, {0.2, 0.4}, {0.4, 0.4}, {0.4, 0.2}, {0.2, 0.2}},
},
DocShapeName: "polygon1",
Desc: "linestring does not intersect polygon when contained in the hole",
Expected: nil,
},
{
QueryShape: [][]float64{{0.3, 0.3}, {0.5, 0.5}},
DocShapeVertices: [][][]float64{
{{0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}},
{{0.2, 0.2}, {0.2, 0.4}, {0.4, 0.4}, {0.4, 0.2}, {0.2, 0.2}},
},
DocShapeName: "polygon1",
Desc: "linestring intersects polygon in the hole",
Expected: []string{"polygon1"},
},
{
QueryShape: [][]float64{{0.4, 0.3}, {0.6, 0.3}},
DocShapeVertices: [][][]float64{
{{0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}},
{{0.3, 0.3}, {0.4, 0.2}, {0.5, 0.3}, {0.4, 0.4}, {0.3, 0.3}},
{{0.5, 0.3}, {0.6, 0.2}, {0.7, 0.3}, {0.6, 0.4}, {0.5, 0.3}},
},
DocShapeName: "polygon1",
Desc: "linestring intersects polygon through touching holes",
Expected: []string{"polygon1"},
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "polygon",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeLinestringQueryWithRelation("intersects",
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for linestring: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestLinestringPointIntersects(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices []float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: [][]float64{{179, 0}, {-179, 0}},
DocShapeVertices: []float64{179.1, 0},
DocShapeName: "point1",
Desc: "point across longitudinal boundary of linestring",
Expected: []string{"point1"},
},
{
QueryShape: [][]float64{{-179, 0}, {179, 0}},
DocShapeVertices: []float64{179.1, 0},
DocShapeName: "point1",
Desc: "point across longitudinal boundary of reversed linestring",
Expected: []string{"point1"},
},
{
QueryShape: [][]float64{{179, 0}, {-179, 0}},
DocShapeVertices: []float64{170, 0},
DocShapeName: "point1",
Desc: "point does not intersect linestring",
Expected: nil,
},
{
QueryShape: [][]float64{{-179, 0}, {179, 0}},
DocShapeVertices: []float64{170, 0},
DocShapeName: "point1",
Desc: "point does not intersect reversed linestring",
Expected: nil,
},
{
QueryShape: [][]float64{{-179, 0}, {179, 0}, {178, 0}},
DocShapeVertices: []float64{178, 0},
DocShapeName: "point1",
Desc: "point intersects linestring at end vertex",
Expected: []string{"point1"},
},
{
QueryShape: [][]float64{{-179, 0}, {179, 0}, {178, 0}, {180, 0}},
DocShapeVertices: []float64{178, 0},
DocShapeName: "point1",
Desc: "point intersects linestring with more than two points",
Expected: []string{"point1"},
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "point",
[][][][]float64{{{test.DocShapeVertices}}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeLinestringQueryWithRelation("intersects",
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for linestring: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiLinestringIntersects(t *testing.T) {
tests := []struct {
QueryShape [][][]float64
DocShapeVertices [][][]float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: [][][]float64{{{1.0, 1.0}, {1.1, 1.1}, {2.0, 2.0}, {2.1, 2.1}}},
DocShapeVertices: [][][]float64{{{0.0, 0.5132}, {-1.1, -1.1}, {1.5, 1.512}, {2.1, 2.1}}},
DocShapeName: "multilinestring1",
Desc: "intersecting multilinestrings",
Expected: []string{"multilinestring1"},
},
{
QueryShape: [][][]float64{{{1.0, 1.0}, {1.1, 1.1}, {2.0, 2.0}, {2.1, 2.1}}},
DocShapeVertices: [][][]float64{{{10.1, 100.5}, {11.5, 102.5}}},
DocShapeName: "multilinestring1",
Desc: "non-intersecting multilinestrings",
Expected: nil,
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multilinestring",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeMultiLinestringQueryWithRelation("intersects",
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for multilinestring: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiLinestringMultiPointIntersects(t *testing.T) {
tests := []struct {
QueryShape [][][]float64
DocShapeVertices [][]float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: [][][]float64{{{2.0, 2.0}, {2.1, 2.1}}, {{3.0, 3.0}, {3.1, 3.1}}},
DocShapeVertices: [][]float64{{5.0, 6.0}, {67, 67}, {3.1, 3.1}},
DocShapeName: "multipoint1",
Desc: "multilinestring intersects one of the multipoints",
Expected: []string{"multipoint1"},
},
{
QueryShape: [][][]float64{{{2.0, 2.0}, {2.1, 2.1}}, {{3.0, 3.0}, {3.1, 3.1}}},
DocShapeVertices: [][]float64{{56.0, 56.0}, {66, 66}},
DocShapeName: "multipoint1",
Desc: "multilinestring does not intersect any of the multipoints",
Expected: nil,
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multipoint",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeMultiLinestringQueryWithRelation("intersects",
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPolygonIntersects(t *testing.T) {
tests := []struct {
QueryShape [][][]float64
DocShapeVertices [][][]float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: [][][]float64{{
{1.0, 1.0},
{2.0, 1.0},
{2.0, 2.0},
{1.0, 2.0},
{1.0, 1.0},
}},
DocShapeVertices: [][][]float64{{
{1.0, 1.0},
{2.0, 1.0},
{2.0, 2.0},
{1.0, 2.0},
{1.0, 1.0},
}},
DocShapeName: "polygon1",
Desc: "coincident polygons",
Expected: []string{"polygon1"},
},
{
QueryShape: [][][]float64{{
{1.0, 1.0},
{2.0, 1.0},
{2.0, 2.0},
{1.0, 2.0},
{1.0, 1.0},
}},
DocShapeVertices: [][][]float64{{
{1.2, 1.2},
{2.0, 1.0},
{2.0, 2.0},
{1.0, 2.0},
{1.2, 1.2},
}},
DocShapeName: "polygon1",
Desc: "polygon and a window polygon",
Expected: []string{"polygon1"},
},
{
QueryShape: [][][]float64{{
{1.0, 1.0},
{2.0, 1.0},
{2.0, 2.0},
{1.0, 2.0},
{1.0, 1.0},
}},
DocShapeVertices: [][][]float64{{
{1.1, 1.1},
{1.2, 1.1},
{1.2, 1.2},
{1.1, 1.2},
{1.1, 1.1},
}},
DocShapeName: "polygon1",
Desc: "nested polygons",
Expected: []string{"polygon1"},
},
{
QueryShape: [][][]float64{{
{1.0, 1.0},
{2.0, 1.0},
{2.0, 2.0},
{1.0, 2.0},
{1.0, 1.0},
}},
DocShapeVertices: [][][]float64{{
{0.0, 1.0},
{2.0, 1.0},
{2.0, 2.0},
{0.0, 2.0},
{0.0, 1.0},
}},
DocShapeName: "polygon1",
Desc: "intersecting polygons",
Expected: []string{"polygon1"},
},
{
QueryShape: [][][]float64{{{0, 0}, {5, 0}, {5, 5}, {0, 5}, {0, 0}}, {
{1, 4},
{4, 4},
{4, 1},
{1, 1},
{1, 4},
}},
DocShapeVertices: [][][]float64{{{2, 2}, {3, 2}, {3, 3}, {2, 3}, {2, 2}}},
DocShapeName: "polygon1",
Desc: "polygon inside hole of a larger polygon",
Expected: nil,
},
{
QueryShape: [][][]float64{{
{1.0, 1.0},
{2.0, 1.0},
{2.0, 2.0},
{1.0, 2.0},
{1.0, 1.0},
}},
DocShapeVertices: [][][]float64{{
{3.0, 3.0},
{4.0, 3.0},
{4.0, 4.0},
{3.0, 4.0},
{3.0, 3.0},
}},
DocShapeName: "polygon1",
Desc: "disjoint polygons",
Expected: nil,
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "polygon",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePolygonQueryWithRelation("intersects",
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPolygonLinestringIntersects(t *testing.T) {
tests := []struct {
QueryShape [][][]float64
DocShapeVertices [][]float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: [][][]float64{{{150, 85}, {160, 85}, {-20, 85}, {-30, 85}, {150, 85}}},
DocShapeVertices: [][]float64{{150, 85}, {160, 85}},
DocShapeName: "linestring1",
Desc: "polygon intersects line along edge",
Expected: []string{"linestring1"},
},
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: [][]float64{{150, 85}, {160, 85}},
DocShapeName: "linestring1",
Desc: "polygon not intersecting line",
Expected: nil,
},
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: [][]float64{{0.2, 0.2}, {0.4, 0.4}},
DocShapeName: "linestring1",
Desc: "polygon completely encloses line",
Expected: []string{"linestring1"},
},
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: [][]float64{{-0.5, 0.5}, {1.5, 0.5}},
DocShapeName: "linestring1",
Desc: "line cuts through entire polygon",
Expected: []string{"linestring1"},
},
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: [][]float64{{-0.439, -0.318}, {0.4339, 0.335}},
DocShapeName: "linestring1",
Desc: "line partially cuts through polygon",
Expected: []string{"linestring1"},
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "linestring",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePolygonQueryWithRelation("intersects",
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPolygonMultiLinestringIntersects(t *testing.T) {
tests := []struct {
QueryShape [][][]float64
DocShapeVertices [][][]float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: [][][]float64{{{150, 85}, {160, 85}, {-20, 85}, {-30, 85}, {150, 85}}},
DocShapeVertices: [][][]float64{{{150, 85}, {160, 85}}, {{0, 1}, {5, 10}}},
DocShapeName: "multilinestring1",
Desc: "polygon intersects one line along edge",
Expected: []string{"multilinestring1"},
},
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: [][][]float64{{{150, 85}, {160, 85}}},
DocShapeName: "multilinestring1",
Desc: "polygon not intersecting any line",
Expected: nil,
},
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: [][][]float64{{{0.2, 0.2}, {0.4, 0.4}}},
DocShapeName: "multilinestring1",
Desc: "polygon completely encloses line",
Expected: []string{"multilinestring1"},
},
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: [][][]float64{{{-0.5, 0.5}, {1.5, 0.5}}},
DocShapeName: "multilinestring1",
Desc: "line cuts through entire polygon",
Expected: []string{"multilinestring1"},
},
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: [][][]float64{{{-0.439, -0.318}, {0.4339, 0.335}}},
DocShapeName: "multilinestring1",
Desc: "line partially cuts through polygon",
Expected: []string{"multilinestring1"},
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multilinestring",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePolygonQueryWithRelation("intersects",
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPolygonPointIntersects(t *testing.T) {
tests := []struct {
QueryShape [][][]float64
DocShapeVertices []float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: [][][]float64{{{150, 85}, {160, 85}, {-20, 85}, {-30, 85}, {150, 85}}},
DocShapeVertices: []float64{150, 88},
DocShapeName: "point1",
Desc: "polygon intersects point in latitudinal boundary",
Expected: []string{"point1"},
},
{
QueryShape: [][][]float64{{{150, 85}, {160, 85}, {-20, 85}, {-30, 85}, {150, 85}}},
DocShapeVertices: []float64{170, 88},
DocShapeName: "point1",
Desc: "polygon does not intersects point outside latitudinal boundary",
Expected: nil,
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "point",
[][][][]float64{{{test.DocShapeVertices}}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePolygonQueryWithRelation("intersects",
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiPolygonIntersects(t *testing.T) {
tests := []struct {
QueryShape [][][][]float64
DocShapeVertices [][][][]float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: [][][][]float64{{{
{15, 5},
{40, 10},
{10, 20},
{5, 10},
{15, 5},
}}, {{{30, 20}, {45, 40}, {10, 40}, {30, 20}}}},
DocShapeVertices: [][][][]float64{{{
{0.0, 0.0},
{1.0, 0.0},
{1.0, 1.0},
{0.0, 1.0},
{0.0, 0.0},
}, {{30, 20}, {45, 40}, {10, 40}, {30, 20}}}},
DocShapeName: "multipolygon1",
Desc: "intersecting multi polygons",
Expected: []string{"multipolygon1"},
},
{
QueryShape: [][][][]float64{{{
{15, 5},
{40, 10},
{10, 20},
{5, 10},
{15, 5},
}}, {{{30, 20}, {45, 40}, {10, 40}, {30, 20}}}},
DocShapeVertices: [][][][]float64{{{
{0.0, 0.0},
{1.0, 0.0},
{1.0, 1.0},
{0.0, 1.0},
{0.0, 0.0},
}}},
DocShapeName: "multipolygon1",
Desc: "non intersecting multi polygons",
Expected: nil,
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multipolygon",
test.DocShapeVertices, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeMultiPolygonQueryWithRelation("intersects",
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for multipolygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiPolygonMultiPointIntersects(t *testing.T) {
tests := []struct {
QueryShape [][][][]float64
DocShapeVertices [][]float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: [][][][]float64{
{{{30, 20}, {45, 40}, {10, 40}, {30, 20}}},
{{{15, 5}, {40, 10}, {10, 20}, {5, 10}, {15, 5}}},
},
DocShapeVertices: [][]float64{{30, 20}, {30, 30}},
DocShapeName: "multipoint1",
Desc: "multipolygon intersects multipoint at the vertex",
Expected: []string{"multipoint1"},
},
{
QueryShape: [][][][]float64{
{{{15, 5}, {40, 10}, {10, 20}, {5, 10}, {15, 5}}},
{{{30, 20}, {45, 50}, {10, 50}, {30, 20}}},
},
DocShapeVertices: [][]float64{{30, -20}, {-30, 30}, {45, 66}},
DocShapeName: "multipoint1",
Desc: "multipolygon does not intersect multipoint",
Expected: nil,
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multipoint",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeMultiPolygonQueryWithRelation("intersects",
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for multipolygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiPolygonMultiLinestringIntersects(t *testing.T) {
tests := []struct {
QueryShape [][][][]float64
DocShapeVertices [][][]float64
DocShapeName string
Desc string
Expected []string
}{
{
QueryShape: [][][][]float64{{{{15, 5}, {40, 10}, {10, 20}, {5, 10}, {15, 5}}}, {{{30, 20}, {45, 40}, {10, 40}, {30, 20}}}},
DocShapeVertices: [][][]float64{{{65, 40}, {60, 40}}, {{45, 40}, {10, 40}, {30, 20}}},
DocShapeName: "multilinestring1",
Desc: "multipolygon intersects multilinestring",
Expected: []string{"multilinestring1"},
},
{
QueryShape: [][][][]float64{{{{15, 5}, {40, 10}, {10, 20}, {5, 10}, {15, 5}}}, {{{30, 20}, {45, 40}, {10, 40}, {30, 20}}}},
DocShapeVertices: [][][]float64{{{45, 41}, {60, 80}}, {{-45, -40}, {-10, -40}}},
DocShapeName: "multilinestring1",
Desc: "multipolygon does not intersect multilinestring",
Expected: nil,
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multilinestring",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeMultiPolygonQueryWithRelation("intersects",
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for multipolygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestGeometryCollectionIntersects(t *testing.T) {
tests := []struct {
QueryShape [][][][][]float64
DocShapeVertices [][][][][]float64
DocShapeName string
Desc string
Expected []string
Types []string
}{
{
QueryShape: [][][][][]float64{{{{}}}},
DocShapeVertices: [][][][][]float64{{{{}}}},
DocShapeName: "geometrycollection1",
Desc: "empty geometry collections",
Expected: nil,
Types: []string{""},
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetupGeometryCollection(t, test.DocShapeName, test.Types,
test.DocShapeVertices, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeGeometryCollectionRelationQuery("intersects",
indexReader, test.QueryShape, test.Types, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for geometry collection: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestGeometryCollectionPointIntersects(t *testing.T) {
tests := []struct {
QueryShape [][][][][]float64
DocShapeVertices []float64
DocShapeName string
Desc string
Expected []string
Types []string
}{
{
QueryShape: [][][][][]float64{{{{{4, 5}}}}},
DocShapeVertices: []float64{4, 5},
DocShapeName: "point1",
Desc: "point coincident with point in geometry collection",
Expected: []string{"point1"},
Types: []string{"point"},
},
{
QueryShape: [][][][][]float64{{{{{4, 5}, {6, 7}}}}},
DocShapeVertices: []float64{4, 5},
DocShapeName: "point1",
Desc: "point on vertex of linestring in geometry collection",
Expected: []string{"point1"},
Types: []string{"linestring"},
},
{
QueryShape: [][][][][]float64{{{{{1, 1}, {2, 2}, {0, 2}, {1, 1}}}, {{{5, 6}}}}},
DocShapeVertices: []float64{1.5, 1.9},
DocShapeName: "point1",
Desc: "point inside polygon in geometry collection",
Expected: []string{"point1"},
Types: []string{"polygon", "point"},
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "point",
[][][][]float64{{{test.DocShapeVertices}}}, i)
if err != nil {
t.Fatal(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeGeometryCollectionRelationQuery("intersects",
indexReader, test.QueryShape, test.Types, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for geometry collection: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestGeometryCollectionLinestringIntersects(t *testing.T) {
tests := []struct {
QueryShape [][][][][]float64
DocShapeVertices [][]float64
DocShapeName string
Desc string
Expected []string
Types []string
}{
{
QueryShape: [][][][][]float64{{{{{4, 5}, {6, 7}, {7, 8}}}}},
DocShapeVertices: [][]float64{{6, 7}, {7, 8}},
DocShapeName: "linestring1",
Desc: "linestring intersecting with linestring in geometry collection",
Expected: []string{"linestring1"},
Types: []string{"linestring"},
},
{
QueryShape: [][][][][]float64{{{{{1.5, 1.9}}}}},
DocShapeVertices: [][]float64{{1.5, 1.9}, {2.5, 2.8}},
DocShapeName: "linestring1",
Desc: "linestring intersects point in geometry collection at vertex",
Expected: []string{"linestring1"},
Types: []string{"point"},
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "linestring",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeGeometryCollectionRelationQuery("intersects",
indexReader, test.QueryShape, test.Types, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for geometry collection: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestGeometryCollectionPolygonIntersects(t *testing.T) {
tests := []struct {
QueryShape [][][][][]float64
DocShapeVertices [][][]float64
DocShapeName string
Desc string
Expected []string
Types []string
}{
{
QueryShape: [][][][][]float64{{{{{4, 5}, {6, 7}, {7, 8}, {4, 5}}}, {{{1, 2}, {2, 3}, {3, 4}, {1, 2}}}}},
DocShapeVertices: [][][]float64{{{4, 5}, {6, 7}, {7, 8}, {4, 5}}},
DocShapeName: "polygon1",
Desc: "polygon coincides with one of the polygons in multipolygon in geometry collection",
Expected: []string{"polygon1"},
Types: []string{"multipolygon"},
},
{
QueryShape: [][][][][]float64{{{{{14, 15}}}}},
DocShapeVertices: [][][]float64{{{4, 5}, {6, 7}, {7, 8}, {4, 5}}},
DocShapeName: "polygon1",
Desc: "polygon does not intersect point in geometry collection",
Expected: nil,
Types: []string{"point"},
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "polygon",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeGeometryCollectionRelationQuery("intersects",
indexReader, test.QueryShape, test.Types, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for geometry collection: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPointGeometryCollectionIntersects(t *testing.T) {
tests := []struct {
QueryShape []float64
DocShapeVertices [][][][][]float64
DocShapeName string
Desc string
Expected []string
Types []string
}{
{
QueryShape: []float64{1.0, 2.0},
DocShapeVertices: [][][][][]float64{{{{}}}},
DocShapeName: "geometrycollection1",
Desc: "geometry collection does not intersect with a point",
Expected: nil,
Types: []string{""},
},
{
QueryShape: []float64{1.0, 2.0},
DocShapeVertices: [][][][][]float64{{{{{1.0, 2.0}}}}},
DocShapeName: "geometrycollection1",
Desc: "geometry collection intersects with a point",
Expected: []string{"geometrycollection1"},
Types: []string{"point"},
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetupGeometryCollection(t, test.DocShapeName, test.Types,
test.DocShapeVertices, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery("intersects",
false, indexReader, [][]float64{test.QueryShape}, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for point: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
================================================
FILE: search/searcher/geoshape_within_test.go
================================================
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"fmt"
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/document"
index "github.com/blevesearch/bleve_index_api"
)
var (
leftRect [][][]float64 = [][][]float64{{{-1, 0}, {0, 0}, {0, 1}, {-1, 1}, {-1, 0}}}
leftRectPoint []float64 = []float64{-0.5, 0.5}
rightRect [][][]float64 = [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}}
rightRectPoint []float64 = []float64{0.5, 0.5}
)
func testCaseSetupGeometryCollection(t *testing.T, docShapeName string, types []string, docShapeVertices [][][][][]float64,
i index.Index,
) (index.IndexReader, func() error, error) {
doc := document.NewDocument(docShapeName)
gcField := document.NewGeometryCollectionFieldWithIndexingOptions("geometry",
[]uint64{}, docShapeVertices, types, document.DefaultGeoShapeIndexingOptions)
if gcField == nil {
return nil, nil, fmt.Errorf("the GC field is nil")
}
doc.AddField(gcField)
if doc == nil {
return nil, nil, fmt.Errorf("the doc is nil")
}
err := i.Update(doc)
if err != nil {
t.Error(err.Error())
}
indexReader, err := i.Reader()
if err != nil {
t.Fatal(err)
}
closeFn := func() error {
err = i.Delete(doc.ID())
if err != nil {
return err
}
err = indexReader.Close()
if err != nil {
return err
}
return nil
}
return indexReader, closeFn, nil
}
func TestPointWithin(t *testing.T) {
tests := []struct {
QueryShape []float64
DocShapeVertices []float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: []float64{1.0, 1.0},
DocShapeVertices: []float64{1.0, 1.0},
DocShapeName: "point1",
Expected: []string{"point1"},
Desc: "point contains itself",
QueryType: "within",
},
{
QueryShape: []float64{1.0, 1.0},
DocShapeVertices: []float64{1.0, 1.1},
DocShapeName: "point1",
Expected: nil,
Desc: "point does not contain a different point",
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "point",
[][][][]float64{{{test.DocShapeVertices}}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery(test.QueryType,
false, indexReader, [][]float64{test.QueryShape}, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for point: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiPointWithin(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: [][]float64{{1.0, 1.0}, {2.0, 2.0}},
DocShapeVertices: [][]float64{{1.0, 1.0}},
DocShapeName: "multipoint1",
Expected: []string{"multipoint1"},
Desc: "single multipoint common",
QueryType: "within",
},
{
QueryShape: [][]float64{{1.0, 1.0}},
DocShapeVertices: [][]float64{{1.0, 1.0}, {2.0, 2.0}},
DocShapeName: "multipoint1",
Expected: nil,
Desc: "multipoint not covered by multiple multipoints",
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multipoint",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery(test.QueryType,
true, indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for multipoint: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestEnvelopePointWithin(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices []float64
DocShapeName string
Desc string
Expected []string
QueryType string
}{
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: []float64{0.5, 0.5},
DocShapeName: "point1",
Desc: "point completely within bounded rectangle",
Expected: []string{"point1"},
QueryType: "within",
},
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: []float64{0, 1},
DocShapeName: "point1",
Desc: "point on vertex of bounded rectangle",
Expected: []string{"point1"},
QueryType: "within",
},
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: []float64{10, 11},
DocShapeName: "point1",
Desc: "point outside bounded rectangle",
Expected: nil,
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "point",
[][][][]float64{{{test.DocShapeVertices}}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeEnvelopeRelationQuery(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for Envelope: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestEnvelopeLinestringWithin(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][]float64
DocShapeName string
Desc string
Expected []string
QueryType string
}{
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: [][]float64{{0.5, 0.5}, {0.75, 0.75}},
DocShapeName: "linestring1",
Desc: "linestring completely within bounded rectangle",
Expected: []string{"linestring1"},
QueryType: "within",
},
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: [][]float64{{0.5, 0.5}, {1.75, 1.75}},
DocShapeName: "linestring1",
Desc: "linestring partially within bounded rectangle",
Expected: nil,
QueryType: "within",
},
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: [][]float64{{1.5, 2.5}, {2.75, 2.75}},
DocShapeName: "linestring1",
Desc: "linestring completely outside bounded rectangle",
Expected: nil,
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "linestring",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeEnvelopeRelationQuery(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for Envelope: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestEnvelopePolygonWithin(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][][]float64
DocShapeName string
Desc string
Expected []string
QueryType string
}{
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: [][][]float64{{{0.5, 0.5}, {1, 0.5}, {1, 1}, {0.5, 1}, {0.5, 0.5}}},
DocShapeName: "polygon1",
Desc: "polygon completely within bounded rectangle",
Expected: nil,
QueryType: "within",
},
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: [][][]float64{{{0.5, 0.5}, {1.5, 0.5}, {1.5, 1.5}, {0.5, 1.5}, {0.5, 0.5}}},
DocShapeName: "polygon1",
Desc: "polygon partially within bounded rectangle",
Expected: nil,
QueryType: "within",
},
{
QueryShape: [][]float64{{0, 1}, {1, 0}},
DocShapeVertices: [][][]float64{{{10.5, 10.5}, {11.5, 10.5}, {11.5, 11.5}, {10.5, 11.5}, {10.5, 10.5}}},
DocShapeName: "polygon1",
Desc: "polygon completely outside bounded rectangle",
Expected: nil,
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "polygon",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeEnvelopeRelationQuery(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for Envelope: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPointLinestringWithin(t *testing.T) {
tests := []struct {
QueryShape []float64
DocShapeVertices [][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: []float64{1.0, 1.0},
DocShapeVertices: [][]float64{{1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}},
DocShapeName: "linestring1",
Expected: nil,
Desc: "point does not cover different linestring",
QueryType: "within",
},
{
QueryShape: []float64{179.1, 0.0},
DocShapeVertices: [][]float64{{-179.0, 0.0}, {179.0, 0.0}},
DocShapeName: "linestring1",
Expected: nil,
Desc: "point across latitudinal boundary of linestring",
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "linestring",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery(test.QueryType,
false, indexReader, [][]float64{test.QueryShape}, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for point: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPointPolygonWithin(t *testing.T) {
tests := []struct {
QueryShape []float64
DocShapeVertices [][][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: []float64{1.0, 1.0},
DocShapeVertices: [][][]float64{{{0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}}},
DocShapeName: "polygon1",
Expected: nil,
Desc: "point not within polygon",
QueryType: "within",
},
{ // from binary predicates file
QueryShape: rightRectPoint,
DocShapeVertices: rightRect,
DocShapeName: "polygon1",
Expected: nil, // will return nil since a point only returns non-nil for a coincident point
// even if the point is on the polygon
Desc: "point on rectangle vertex",
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "polygon",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery(test.QueryType,
false, indexReader, [][]float64{test.QueryShape}, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for point: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestLinestringPointWithin(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices []float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: [][]float64{{1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}},
DocShapeVertices: []float64{1.0, 1.0},
DocShapeName: "point1",
Expected: []string{"point1"},
Desc: "point at start of linestring",
QueryType: "within",
},
{
QueryShape: [][]float64{{1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}},
DocShapeVertices: []float64{2.0, 2.0},
DocShapeName: "point1",
Expected: []string{"point1"},
Desc: "point in the middle of linestring",
QueryType: "within",
},
{
QueryShape: [][]float64{{1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}},
DocShapeVertices: []float64{3.0, 3.0},
DocShapeName: "point1",
Expected: []string{"point1"},
Desc: "point at end of linestring",
QueryType: "within",
},
{
QueryShape: [][]float64{{1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}},
DocShapeVertices: []float64{1.5, 1.50017},
DocShapeName: "point1",
Expected: nil,
Desc: "point in between linestring",
QueryType: "within",
},
{
QueryShape: [][]float64{{1.0, 1.0}, {2.0, 2.0}, {3.0, 3.0}},
DocShapeVertices: []float64{4, 5},
DocShapeName: "point1",
Expected: nil,
Desc: "point not contained by linestring",
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "point",
[][][][]float64{{{test.DocShapeVertices}}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeLinestringQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for linestring: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiPointMultiLinestringWithin(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: [][]float64{{2, 2}, {2.1, 2.1}},
DocShapeVertices: [][][]float64{{{1, 1}, {1.1, 1.1}}, {{2, 2}, {2.1, 2.1}}},
DocShapeName: "multilinestring1",
Expected: nil, // nil since multipoint within multiline is always nil
Desc: "multilinestring covering multipoint",
QueryType: "within",
},
{
QueryShape: [][]float64{{2, 2}, {1, 1}, {3, 3}},
DocShapeVertices: [][][]float64{{{1, 1}, {1.1, 1.1}}, {{2, 2}, {2.1, 2.1}}},
DocShapeName: "multipoint1",
Expected: nil,
Desc: "multilinestring not covering multipoint",
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multilinestring",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery(test.QueryType,
true, indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for multilinestring: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestLinestringWithin(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: [][]float64{{1, 1}, {2, 2}, {3, 3}},
DocShapeVertices: [][]float64{{1, 1}, {2, 2}, {3, 3}, {4, 4}},
DocShapeName: "linestring1",
Expected: nil,
Desc: "longer linestring",
QueryType: "within",
},
{
QueryShape: [][]float64{{1, 1}, {2, 2}, {3, 3}},
DocShapeVertices: [][]float64{{1, 1}, {2, 2}, {3, 3}},
DocShapeName: "linestring1",
Expected: nil,
Desc: "coincident linestrings",
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "linestring",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeLinestringQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for linestring: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestLinestringGeometryCollectionWithin(t *testing.T) {
tests := []struct {
QueryShape [][]float64
DocShapeVertices [][][][][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
Types []string
}{
{
QueryShape: [][]float64{{1, 1}, {2, 2}},
DocShapeVertices: [][][][][]float64{{{{{1, 1}}}}},
DocShapeName: "geometrycollection1",
Expected: nil, // LS is not a closed shape
Desc: "geometry collection with a point on vertex of linestring",
Types: []string{"point"},
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetupGeometryCollection(t, test.DocShapeName, test.Types,
test.DocShapeVertices, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeLinestringQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for linestring: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPolygonPointWithin(t *testing.T) {
tests := []struct {
QueryShape [][][]float64
DocShapeVertices []float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: []float64{0.5, 0.5},
DocShapeName: "point1",
Expected: []string{"point1"},
Desc: "point within polygon",
QueryType: "within",
},
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: []float64{5.5, 5.5},
DocShapeName: "point1",
Expected: nil,
Desc: "point not within polygon",
QueryType: "within",
},
{
QueryShape: [][][]float64{{
{0, 0},
{1, 0},
{1, 1},
{0, 1},
{0, 0},
{0.2, 0.2},
{0.2, 0.4},
{0.4, 0.4},
{0.4, 0.2},
{0.2, 0.2},
}},
DocShapeVertices: []float64{0.3, 0.3},
DocShapeName: "point1",
Expected: nil,
Desc: "point within polygon hole",
QueryType: "within",
},
{
QueryShape: [][][]float64{{{0.0, 0.0}, {1.0, 0.0}, {1.0, 1.0}, {0.0, 1.0}, {0.0, 0.0}}},
DocShapeVertices: []float64{1.0, 0.0},
DocShapeName: "point1",
Expected: []string{"point1"},
Desc: "point on polygon vertex",
QueryType: "within",
},
{
QueryShape: [][][]float64{{{1, 1}, {2, 2}, {0, 2}, {1, 1}}},
DocShapeVertices: []float64{1.5, 1.5001714},
DocShapeName: "point1",
Expected: []string{"point1"},
Desc: "point inside polygon",
QueryType: "within",
},
{
QueryShape: [][][]float64{{{150, 85}, {-20, -85}, {-30, 85}, {160, -85}, {150, 85}}},
DocShapeVertices: []float64{170, 85},
DocShapeName: "point1",
Expected: nil,
Desc: "point outside the polygon's latitudinal boundary",
QueryType: "within",
},
{
// from binary predicates tests
QueryShape: leftRect,
DocShapeVertices: leftRectPoint,
DocShapeName: "point1",
Expected: []string{"point1"},
Desc: "point in left rectangle",
QueryType: "within",
},
{
// from binary predicates tests
QueryShape: rightRect,
DocShapeVertices: rightRectPoint,
DocShapeName: "point1",
Expected: []string{"point1"},
Desc: "point in right rectangle",
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "point",
[][][][]float64{{{test.DocShapeVertices}}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePolygonQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPolygonLinestringWithin(t *testing.T) {
tests := []struct {
QueryShape [][][]float64
DocShapeVertices [][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: [][]float64{{0.1, 0.1}, {0.4, 0.4}},
DocShapeName: "linestring1",
Expected: []string{"linestring1"},
Desc: "linestring within polygon",
QueryType: "within",
},
{
QueryShape: [][][]float64{
{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}},
{{0.2, 0.2}, {0.2, 0.4}, {0.4, 0.4}, {0.4, 0.2}, {0.2, 0.2}},
},
DocShapeVertices: [][]float64{{0.3, 0.3}, {0.55, 0.55}},
DocShapeName: "linestring1",
Expected: nil,
Desc: "linestring intersecting with polygon hole",
QueryType: "within",
},
{
QueryShape: [][][]float64{
{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}},
{{0.2, 0.2}, {0.2, 0.4}, {0.4, 0.4}, {0.4, 0.2}, {0.2, 0.2}},
},
DocShapeVertices: [][]float64{{0.3, 0.3}, {4.0, 4.0}},
DocShapeName: "linestring1",
Expected: nil,
Desc: "linestring intersecting with polygon hole and outside",
QueryType: "within",
},
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: [][]float64{{-1, -1}, {-2, -2}},
DocShapeName: "linestring1",
Expected: nil,
Desc: "linestring outside polygon",
QueryType: "within",
},
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: [][]float64{{-0.5, -0.5}, {0.5, 0.5}},
DocShapeName: "linestring1",
Expected: nil,
Desc: "linestring intersecting polygon",
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "linestring",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePolygonQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestPolygonWithin(t *testing.T) {
tests := []struct {
QueryShape [][][]float64
DocShapeVertices [][][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeName: "polygon1",
Expected: []string{"polygon1"},
Desc: "coincident polygon",
QueryType: "within",
},
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: [][][]float64{{{0.2, 0.2}, {1, 0}, {1, 1}, {0, 1}, {0.2, 0.2}}},
DocShapeName: "polygon1",
Expected: []string{"polygon1"},
Desc: "polygon covers an intersecting window of itself",
QueryType: "within",
},
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: [][][]float64{{{0.1, 0.1}, {0.2, 0.1}, {0.2, 0.2}, {0.1, 0.2}, {0.1, 0.1}}},
DocShapeName: "polygon1",
Expected: []string{"polygon1"},
Desc: "polygon covers a nested version of itself",
QueryType: "within",
},
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: [][][]float64{{{-1, 0}, {1, 0}, {1, 1}, {-1, 1}, {-1, 0}}},
DocShapeName: "polygon1",
Expected: nil,
Desc: "intersecting polygons",
QueryType: "within",
},
{
QueryShape: [][][]float64{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
DocShapeVertices: [][][]float64{{{3, 3}, {4, 3}, {4, 4}, {3, 4}, {3, 3}}},
DocShapeName: "polygon1",
Expected: nil,
Desc: "polygon totally out of range",
QueryType: "within",
},
{
QueryShape: leftRect,
DocShapeVertices: rightRect,
DocShapeName: "polygon1",
Expected: nil,
Desc: "left and right polygons,sharing an edge",
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "polygon",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePolygonQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiPolygonMultiPointWithin(t *testing.T) {
tests := []struct {
QueryShape [][][][]float64
DocShapeVertices [][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: [][][][]float64{
{{{30, 25}, {45, 40}, {10, 40}, {30, 20}, {30, 25}}},
{{{15, 5}, {40, 10}, {10, 20}, {5, 10}, {15, 5}}},
},
DocShapeVertices: [][]float64{{30, 20}, {15, 5}},
DocShapeName: "multipoint1",
Expected: []string{"multipoint1"},
Desc: "multipolygon covers multipoint",
QueryType: "within",
},
{
QueryShape: [][][][]float64{
{{{15, 5}, {40, 10}, {10, 20}, {5, 10}, {15, 5}}},
{{{30, 20}, {45, 40}, {10, 40}, {30, 20}}},
},
DocShapeVertices: [][]float64{{30, 20}, {30, 30}, {45, 66}},
DocShapeName: "multipoint1",
Expected: nil,
Desc: "multipolygon does not cover multipoint",
QueryType: "within",
},
{
QueryShape: [][][][]float64{
{{{0, 0}, {1, 0}, {1, 1}, {0, 1}, {0, 0}}},
{{{1, 0}, {2, 0}, {2, 1}, {1, 1}, {1, 0}}},
},
DocShapeVertices: [][]float64{{0.5, 0.5}, {1.5, 0.5}},
DocShapeName: "multipoint1",
Expected: []string{"multipoint1"},
Desc: "multiple multipolygons required to cover multipoint",
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multipoint",
[][][][]float64{{test.DocShapeVertices}}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeMultiPolygonQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for polygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiLinestringWithin(t *testing.T) {
tests := []struct {
QueryShape [][][]float64
DocShapeVertices [][][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: [][][]float64{{{1, 2}, {2, 3}, {3, 4}}, {{5, 6}, {6.5, 7.8}}},
DocShapeVertices: [][][]float64{{{1, 2}, {2, 3}, {3, 4}}},
DocShapeName: "multilinestring1",
Expected: nil,
Desc: "multilinestrings with common linestrings",
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multilinestring",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeMultiLinestringQueryWithRelation("within",
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for multilinestring: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiPolygonMultiLinestringWithin(t *testing.T) {
tests := []struct {
QueryShape [][][][]float64
DocShapeVertices [][][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: [][][][]float64{
{{{15, 5}, {40, 10}, {10, 20}, {5, 10}, {15, 5}}},
{{{30, 20}, {45, 40}, {10, 40}, {30, 20}}},
},
DocShapeVertices: [][][]float64{{{45, 40}, {10, 40}}, {{45, 40}, {10, 40}, {30, 20}}},
DocShapeName: "multilinestring1",
Expected: []string{"multilinestring1"},
Desc: "multilinestring intersecting at the edge of multipolygon",
QueryType: "within",
},
{
QueryShape: [][][][]float64{
{{{15, 5}, {40, 10}, {10, 20}, {5, 10}, {15, 5}}},
{{{30, 20}, {45, 40}, {10, 40}, {30, 20}}},
},
DocShapeVertices: [][][]float64{{{48, 40}, {8, 40}}, {{48, 40}, {8, 40}, {30, 12}}},
DocShapeName: "multilinestring1",
Expected: nil,
Desc: "multipolygon does not cover multilinestring",
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multilinestring",
[][][][]float64{test.DocShapeVertices}, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeMultiPolygonQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for multipolygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestMultiPolygonWithin(t *testing.T) {
tests := []struct {
QueryShape [][][][]float64
DocShapeVertices [][][][]float64
DocShapeName string
Expected []string
Desc string
QueryType string
}{
{
QueryShape: [][][][]float64{
{{{16, 6}, {41, 11}, {11, 21}, {6, 11}, {16, 6}}},
{{{31, 21}, {46, 41}, {11, 41}, {31, 21}}},
},
DocShapeVertices: [][][][]float64{{{{31, 21}, {46, 41}, {11, 41}, {31, 21}}}},
DocShapeName: "multipolygon1",
Expected: []string{"multipolygon1"},
Desc: "multipolygon covers another multipolygon",
QueryType: "within",
},
{
QueryShape: [][][][]float64{
{{{16, 6}, {41, 11}, {11, 21}, {6, 11}, {16, 6}}},
{{{31, 21}, {46, 41}, {11, 41}, {31, 21}}},
},
DocShapeVertices: [][][][]float64{{{{31, 21}, {46, 41}, {16, 46}, {31, 21}}}},
DocShapeName: "multipolygon1",
Expected: nil,
Desc: "multipolygon does not cover multipolygon",
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetup(t, test.DocShapeName, "multipolygon",
test.DocShapeVertices, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeMultiPolygonQueryWithRelation(test.QueryType,
indexReader, test.QueryShape, "geometry")
if err != nil {
t.Error(err.Error())
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for multipolygon: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestGeometryCollectionWithin(t *testing.T) {
tests := []struct {
QueryShape [][][][][]float64
DocShapeVertices [][][][][]float64
DocShapeName string
Desc string
Expected []string
QueryType string
QueryShapeTypes []string
DocShapeTypes []string
}{
{
QueryShape: [][][][][]float64{{{{}}}},
DocShapeVertices: [][][][][]float64{{{{}}}},
DocShapeName: "geometrycollection1",
Desc: "empty geometry collections",
Expected: nil,
QueryType: "within",
QueryShapeTypes: []string{""},
DocShapeTypes: []string{""},
},
{
QueryShape: [][][][][]float64{{{{{1, 2}, {2, 3}}}}},
DocShapeVertices: [][][][][]float64{{{{{1, 2}}}}},
DocShapeName: "geometrycollection1",
Desc: "geometry collection with a linestring",
Expected: []string{"geometrycollection1"},
QueryShapeTypes: []string{"linestring"},
DocShapeTypes: []string{"point"},
QueryType: "within",
},
{
QueryShape: [][][][][]float64{{{{{1, 2}, {2, 3}, {5, 6}}}}},
DocShapeVertices: [][][][][]float64{{{{{1, 2}}}}},
DocShapeName: "geometrycollection1",
Desc: "geometry collections with common points and multipoints",
Expected: []string{"geometrycollection1"},
QueryShapeTypes: []string{"multipoint"},
DocShapeTypes: []string{"point"},
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetupGeometryCollection(t, test.DocShapeName, test.DocShapeTypes,
test.DocShapeVertices, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapeGeometryCollectionRelationQuery(test.QueryType,
indexReader, test.QueryShape, test.QueryShapeTypes, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for geometry collection: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
func TestGeometryCollectionPointWithin(t *testing.T) {
tests := []struct {
QueryShape []float64
DocShapeVertices [][][][][]float64
DocShapeName string
Desc string
Expected []string
Types []string
QueryType string
}{
{
QueryShape: []float64{1.0, 2.0},
DocShapeVertices: [][][][][]float64{{{{}}}},
DocShapeName: "geometrycollection1",
Desc: "empty geometry collection not within a point",
Expected: nil,
Types: []string{""},
QueryType: "within",
},
}
i := setupIndex(t)
for _, test := range tests {
indexReader, closeFn, err := testCaseSetupGeometryCollection(t, test.DocShapeName, test.Types,
test.DocShapeVertices, i)
if err != nil {
t.Error(err.Error())
}
t.Run(test.Desc, func(t *testing.T) {
got, err := runGeoShapePointRelationQuery("intersects",
false, indexReader, [][]float64{test.QueryShape}, "geometry")
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.Expected) {
t.Errorf("expected %v, got %v for point: %+v",
test.Expected, got, test.QueryShape)
}
})
err = closeFn()
if err != nil {
t.Error(err.Error())
}
}
}
================================================
FILE: search/searcher/optimize_knn.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package searcher
import (
"context"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func optimizeKNN(ctx context.Context, indexReader index.IndexReader,
qsearchers []search.Searcher) error {
var octx index.VectorOptimizableContext
var err error
for _, searcher := range qsearchers {
// Only applicable to KNN Searchers.
o, ok := searcher.(index.VectorOptimizable)
if !ok {
continue
}
octx, err = o.VectorOptimize(ctx, octx)
if err != nil {
return err
}
}
// No KNN searchers.
if octx == nil {
return nil
}
// Postings lists and iterators replaced in the pointer to the
// vector reader
return octx.Finish()
}
================================================
FILE: search/searcher/optimize_no_knn.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !vectors
// +build !vectors
package searcher
import (
"context"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func optimizeKNN(ctx context.Context, indexReader index.IndexReader,
qsearchers []search.Searcher) error {
// No-op
return nil
}
================================================
FILE: search/searcher/ordered_searchers_list.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"github.com/blevesearch/bleve/v2/search"
)
type OrderedSearcherList []search.Searcher
// sort.Interface
func (otrl OrderedSearcherList) Len() int {
return len(otrl)
}
func (otrl OrderedSearcherList) Less(i, j int) bool {
return otrl[i].Count() < otrl[j].Count()
}
func (otrl OrderedSearcherList) Swap(i, j int) {
otrl[i], otrl[j] = otrl[j], otrl[i]
}
type OrderedPositionalSearcherList struct {
searchers []search.Searcher
index []int
}
// sort.Interface
func (otrl OrderedPositionalSearcherList) Len() int {
return len(otrl.searchers)
}
func (otrl OrderedPositionalSearcherList) Less(i, j int) bool {
return otrl.searchers[i].Count() < otrl.searchers[j].Count()
}
func (otrl OrderedPositionalSearcherList) Swap(i, j int) {
otrl.searchers[i], otrl.searchers[j] = otrl.searchers[j], otrl.searchers[i]
otrl.index[i], otrl.index[j] = otrl.index[j], otrl.index[i]
}
================================================
FILE: search/searcher/search_boolean.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"math"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/scorer"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeBooleanSearcher int
func init() {
var bs BooleanSearcher
reflectStaticSizeBooleanSearcher = int(reflect.TypeOf(bs).Size())
}
type BooleanSearcher struct {
indexReader index.IndexReader
mustSearcher search.Searcher
shouldSearcher search.Searcher
mustNotSearcher search.Searcher
queryNorm float64
currMust *search.DocumentMatch
currShould *search.DocumentMatch
currMustNot *search.DocumentMatch
currentID index.IndexInternalID
min uint64
scorer *scorer.ConjunctionQueryScorer
matches []*search.DocumentMatch
initialized bool
done bool
}
func NewBooleanSearcher(ctx context.Context, indexReader index.IndexReader, mustSearcher search.Searcher, shouldSearcher search.Searcher, mustNotSearcher search.Searcher, options search.SearcherOptions) (*BooleanSearcher, error) {
// build our searcher
rv := BooleanSearcher{
indexReader: indexReader,
mustSearcher: mustSearcher,
shouldSearcher: shouldSearcher,
mustNotSearcher: mustNotSearcher,
scorer: scorer.NewConjunctionQueryScorer(options),
matches: make([]*search.DocumentMatch, 2),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *BooleanSearcher) Size() int {
sizeInBytes := reflectStaticSizeBooleanSearcher + size.SizeOfPtr
if s.mustSearcher != nil {
sizeInBytes += s.mustSearcher.Size()
}
if s.shouldSearcher != nil {
sizeInBytes += s.shouldSearcher.Size()
}
if s.mustNotSearcher != nil {
sizeInBytes += s.mustNotSearcher.Size()
}
sizeInBytes += s.scorer.Size()
for _, entry := range s.matches {
if entry != nil {
sizeInBytes += entry.Size()
}
}
return sizeInBytes
}
func (s *BooleanSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
if s.mustSearcher != nil {
sumOfSquaredWeights += s.mustSearcher.Weight()
}
if s.shouldSearcher != nil {
sumOfSquaredWeights += s.shouldSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(s.queryNorm)
}
if s.shouldSearcher != nil {
s.shouldSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *BooleanSearcher) initSearchers(ctx *search.SearchContext) error {
var err error
// get all searchers pointing at their first match
if s.mustSearcher != nil {
if s.currMust != nil {
ctx.DocumentMatchPool.Put(s.currMust)
}
s.currMust, err = s.mustSearcher.Next(ctx)
if err != nil {
return err
}
}
if s.shouldSearcher != nil {
if s.currShould != nil {
ctx.DocumentMatchPool.Put(s.currShould)
}
s.currShould, err = s.shouldSearcher.Next(ctx)
if err != nil {
return err
}
}
if s.mustNotSearcher != nil {
if s.currMustNot != nil {
ctx.DocumentMatchPool.Put(s.currMustNot)
}
s.currMustNot, err = s.mustNotSearcher.Next(ctx)
if err != nil {
return err
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.IndexInternalID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.IndexInternalID
} else {
s.currentID = nil
}
s.initialized = true
return nil
}
func (s *BooleanSearcher) advanceNextMust(ctx *search.SearchContext, skipReturn *search.DocumentMatch) error {
var err error
if s.mustSearcher != nil {
if s.currMust != skipReturn {
ctx.DocumentMatchPool.Put(s.currMust)
}
s.currMust, err = s.mustSearcher.Next(ctx)
if err != nil {
return err
}
} else {
if s.currShould != skipReturn {
ctx.DocumentMatchPool.Put(s.currShould)
}
s.currShould, err = s.shouldSearcher.Next(ctx)
if err != nil {
return err
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.IndexInternalID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.IndexInternalID
} else {
s.currentID = nil
}
return nil
}
func (s *BooleanSearcher) Weight() float64 {
var rv float64
if s.mustSearcher != nil {
rv += s.mustSearcher.Weight()
}
if s.shouldSearcher != nil {
rv += s.shouldSearcher.Weight()
}
return rv
}
func (s *BooleanSearcher) SetQueryNorm(qnorm float64) {
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(qnorm)
}
if s.shouldSearcher != nil {
s.shouldSearcher.SetQueryNorm(qnorm)
}
}
func (s *BooleanSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if s.done {
return nil, nil
}
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
var err error
var rv *search.DocumentMatch
for s.currentID != nil {
if s.currMustNot != nil {
cmp := s.currMustNot.IndexInternalID.Compare(s.currentID)
if cmp < 0 {
ctx.DocumentMatchPool.Put(s.currMustNot)
// advance must not searcher to our candidate entry
s.currMustNot, err = s.mustNotSearcher.Advance(ctx, s.currentID)
if err != nil {
return nil, err
}
if s.currMustNot != nil && s.currMustNot.IndexInternalID.Equals(s.currentID) {
// the candidate is excluded
err = s.advanceNextMust(ctx, nil)
if err != nil {
return nil, err
}
continue
}
} else if cmp == 0 {
// the candidate is excluded
err = s.advanceNextMust(ctx, nil)
if err != nil {
return nil, err
}
continue
}
}
shouldCmpOrNil := 1 // NOTE: shouldCmp will also be 1 when currShould == nil.
if s.currShould != nil {
shouldCmpOrNil = s.currShould.IndexInternalID.Compare(s.currentID)
}
if shouldCmpOrNil < 0 {
ctx.DocumentMatchPool.Put(s.currShould)
// advance should searcher to our candidate entry
s.currShould, err = s.shouldSearcher.Advance(ctx, s.currentID)
if err != nil {
return nil, err
}
if s.currShould != nil && s.currShould.IndexInternalID.Equals(s.currentID) {
// score bonus matches should
var cons []*search.DocumentMatch
if s.currMust != nil {
cons = s.matches
cons[0] = s.currMust
cons[1] = s.currShould
} else {
cons = s.matches[0:1]
cons[0] = s.currShould
}
rv = s.scorer.Score(ctx, cons)
err = s.advanceNextMust(ctx, rv)
if err != nil {
return nil, err
}
break
} else if s.shouldSearcher.Min() == 0 {
// match is OK anyway
cons := s.matches[0:1]
cons[0] = s.currMust
rv = s.scorer.Score(ctx, cons)
err = s.advanceNextMust(ctx, rv)
if err != nil {
return nil, err
}
break
}
} else if shouldCmpOrNil == 0 {
// score bonus matches should
var cons []*search.DocumentMatch
if s.currMust != nil {
cons = s.matches
cons[0] = s.currMust
cons[1] = s.currShould
} else {
cons = s.matches[0:1]
cons[0] = s.currShould
}
rv = s.scorer.Score(ctx, cons)
err = s.advanceNextMust(ctx, rv)
if err != nil {
return nil, err
}
break
} else if s.shouldSearcher == nil || s.shouldSearcher.Min() == 0 {
// match is OK anyway
cons := s.matches[0:1]
cons[0] = s.currMust
rv = s.scorer.Score(ctx, cons)
err = s.advanceNextMust(ctx, rv)
if err != nil {
return nil, err
}
break
}
err = s.advanceNextMust(ctx, nil)
if err != nil {
return nil, err
}
}
if rv == nil {
s.done = true
}
return rv, nil
}
func (s *BooleanSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
if s.done {
return nil, nil
}
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
// Advance the searcher only if the cursor is trailing the lookup ID
if s.currentID == nil || s.currentID.Compare(ID) < 0 {
var err error
if s.mustSearcher != nil {
if s.currMust != nil {
ctx.DocumentMatchPool.Put(s.currMust)
}
s.currMust, err = s.mustSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
}
if s.shouldSearcher != nil {
if s.currShould != nil {
ctx.DocumentMatchPool.Put(s.currShould)
}
s.currShould, err = s.shouldSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
}
if s.mustNotSearcher != nil {
// Additional check for mustNotSearcher, whose cursor isn't tracked by
// currentID to prevent it from moving when the searcher's tracked
// position is already ahead of or at the requested ID.
if s.currMustNot == nil || s.currMustNot.IndexInternalID.Compare(ID) < 0 {
if s.currMustNot != nil {
ctx.DocumentMatchPool.Put(s.currMustNot)
}
s.currMustNot, err = s.mustNotSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
}
}
if s.mustSearcher != nil && s.currMust != nil {
s.currentID = s.currMust.IndexInternalID
} else if s.mustSearcher == nil && s.currShould != nil {
s.currentID = s.currShould.IndexInternalID
} else {
s.currentID = nil
}
}
return s.Next(ctx)
}
func (s *BooleanSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
if s.mustSearcher != nil {
sum += s.mustSearcher.Count()
}
if s.shouldSearcher != nil {
sum += s.shouldSearcher.Count()
}
return sum
}
func (s *BooleanSearcher) Close() error {
var err0, err1, err2 error
if s.mustSearcher != nil {
err0 = s.mustSearcher.Close()
}
if s.shouldSearcher != nil {
err1 = s.shouldSearcher.Close()
}
if s.mustNotSearcher != nil {
err2 = s.mustNotSearcher.Close()
}
if err0 != nil {
return err0
}
if err1 != nil {
return err1
}
if err2 != nil {
return err2
}
return nil
}
func (s *BooleanSearcher) Min() int {
return 0
}
func (s *BooleanSearcher) DocumentMatchPoolSize() int {
rv := 3
if s.mustSearcher != nil {
rv += s.mustSearcher.DocumentMatchPoolSize()
}
if s.shouldSearcher != nil {
rv += s.shouldSearcher.DocumentMatchPoolSize()
}
if s.mustNotSearcher != nil {
rv += s.mustNotSearcher.DocumentMatchPoolSize()
}
return rv
}
================================================
FILE: search/searcher/search_boolean_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"testing"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestBooleanSearch(t *testing.T) {
if twoDocIndex == nil {
t.Fatal("its null")
}
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
explainTrue := search.SearcherOptions{Explain: true}
// test 0
beerTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "beer", "desc", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
mustSearcher, err := NewConjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{beerTermSearcher}, explainTrue)
if err != nil {
t.Fatal(err)
}
martyTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "marty", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "dustin", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
shouldSearcher, err := NewDisjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, explainTrue)
if err != nil {
t.Fatal(err)
}
steveTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "steve", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
mustNotSearcher, err := NewDisjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{steveTermSearcher}, 0, explainTrue)
if err != nil {
t.Fatal(err)
}
booleanSearcher, err := NewBooleanSearcher(context.TODO(), twoDocIndexReader, mustSearcher, shouldSearcher, mustNotSearcher, explainTrue)
if err != nil {
t.Fatal(err)
}
// test 1
martyTermSearcher2, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "marty", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher2, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "dustin", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
shouldSearcher2, err := NewDisjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{martyTermSearcher2, dustinTermSearcher2}, 0, explainTrue)
if err != nil {
t.Fatal(err)
}
steveTermSearcher2, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "steve", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
mustNotSearcher2, err := NewDisjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{steveTermSearcher2}, 0, explainTrue)
if err != nil {
t.Fatal(err)
}
booleanSearcher2, err := NewBooleanSearcher(context.TODO(), twoDocIndexReader, nil, shouldSearcher2, mustNotSearcher2, explainTrue)
if err != nil {
t.Fatal(err)
}
// test 2
steveTermSearcher3, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "steve", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
mustNotSearcher3, err := NewDisjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{steveTermSearcher3}, 0, explainTrue)
if err != nil {
t.Fatal(err)
}
booleanSearcher3, err := NewBooleanSearcher(context.TODO(), twoDocIndexReader, nil, nil, mustNotSearcher3, explainTrue)
if err != nil {
t.Fatal(err)
}
// test 3
beerTermSearcher4, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "beer", "desc", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
mustSearcher4, err := NewConjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{beerTermSearcher4}, explainTrue)
if err != nil {
t.Fatal(err)
}
steveTermSearcher4, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "steve", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
mustNotSearcher4, err := NewDisjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{steveTermSearcher4}, 0, explainTrue)
if err != nil {
t.Fatal(err)
}
booleanSearcher4, err := NewBooleanSearcher(context.TODO(), twoDocIndexReader, mustSearcher4, nil, mustNotSearcher4, explainTrue)
if err != nil {
t.Fatal(err)
}
// test 4
beerTermSearcher5, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "beer", "desc", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
mustSearcher5, err := NewConjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{beerTermSearcher5}, explainTrue)
if err != nil {
t.Fatal(err)
}
steveTermSearcher5, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "steve", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
martyTermSearcher5, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "marty", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
mustNotSearcher5, err := NewDisjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{steveTermSearcher5, martyTermSearcher5}, 0, explainTrue)
if err != nil {
t.Fatal(err)
}
booleanSearcher5, err := NewBooleanSearcher(context.TODO(), twoDocIndexReader, mustSearcher5, nil, mustNotSearcher5, explainTrue)
if err != nil {
t.Fatal(err)
}
// test 5
beerTermSearcher6, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "beer", "desc", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
mustSearcher6, err := NewConjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{beerTermSearcher6}, explainTrue)
if err != nil {
t.Fatal(err)
}
martyTermSearcher6, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "marty", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher6, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "dustin", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
shouldSearcher6, err := NewDisjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{martyTermSearcher6, dustinTermSearcher6}, 2, explainTrue)
if err != nil {
t.Fatal(err)
}
booleanSearcher6, err := NewBooleanSearcher(context.TODO(), twoDocIndexReader, mustSearcher6, shouldSearcher6, nil, explainTrue)
if err != nil {
t.Fatal(err)
}
// test 6
beerTermSearcher7, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "beer", "desc", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
mustSearcher7, err := NewConjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{beerTermSearcher7}, explainTrue)
if err != nil {
t.Fatal(err)
}
booleanSearcher7, err := NewBooleanSearcher(context.TODO(), twoDocIndexReader, mustSearcher7, nil, nil, explainTrue)
if err != nil {
t.Fatal(err)
}
martyTermSearcher7, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "marty", "name", 5.0, explainTrue)
if err != nil {
t.Fatal(err)
}
conjunctionSearcher7, err := NewConjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{martyTermSearcher7, booleanSearcher7}, explainTrue)
if err != nil {
t.Fatal(err)
}
// test 7
beerTermSearcher8, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "beer", "desc", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
mustSearcher8, err := NewConjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{beerTermSearcher8}, explainTrue)
if err != nil {
t.Fatal(err)
}
martyTermSearcher8, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "marty", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher8, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "dustin", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
shouldSearcher8, err := NewDisjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{martyTermSearcher8, dustinTermSearcher8}, 0, explainTrue)
if err != nil {
t.Fatal(err)
}
steveTermSearcher8, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "steve", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
mustNotSearcher8, err := NewDisjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{steveTermSearcher8}, 0, explainTrue)
if err != nil {
t.Fatal(err)
}
booleanSearcher8, err := NewBooleanSearcher(context.TODO(), twoDocIndexReader, mustSearcher8, shouldSearcher8, mustNotSearcher8, explainTrue)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher8a, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "dustin", "name", 5.0, explainTrue)
if err != nil {
t.Fatal(err)
}
conjunctionSearcher8, err := NewConjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{booleanSearcher8, dustinTermSearcher8a}, explainTrue)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: booleanSearcher,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("1"),
Score: 0.9818005051949021,
},
{
IndexInternalID: index.IndexInternalID("3"),
Score: 0.808709699395535,
},
{
IndexInternalID: index.IndexInternalID("4"),
Score: 0.34618161159873423,
},
},
},
{
searcher: booleanSearcher2,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("1"),
Score: 0.6775110856165737,
},
{
IndexInternalID: index.IndexInternalID("3"),
Score: 0.6775110856165737,
},
},
},
// no MUST or SHOULD clauses yields no results
{
searcher: booleanSearcher3,
results: []*search.DocumentMatch{},
},
{
searcher: booleanSearcher4,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("1"),
Score: 1.0,
},
{
IndexInternalID: index.IndexInternalID("3"),
Score: 0.5,
},
{
IndexInternalID: index.IndexInternalID("4"),
Score: 1.0,
},
},
},
{
searcher: booleanSearcher5,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("3"),
Score: 0.5,
},
{
IndexInternalID: index.IndexInternalID("4"),
Score: 1.0,
},
},
},
{
searcher: booleanSearcher6,
results: []*search.DocumentMatch{},
},
// test a conjunction query with a nested boolean
{
searcher: conjunctionSearcher7,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("1"),
Score: 2.0097428702814377,
},
},
},
{
searcher: conjunctionSearcher8,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("3"),
Score: 2.0681575785068107,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
}
next, err := test.searcher.Next(ctx)
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
ctx.DocumentMatchPool.Put(next)
next, err = test.searcher.Next(ctx)
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}
================================================
FILE: search/searcher/search_conjunction.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"math"
"reflect"
"sort"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/scorer"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeConjunctionSearcher int
func init() {
var cs ConjunctionSearcher
reflectStaticSizeConjunctionSearcher = int(reflect.TypeOf(cs).Size())
}
type ConjunctionSearcher struct {
indexReader index.IndexReader
searchers []search.Searcher
queryNorm float64
currs []*search.DocumentMatch
maxIDIdx int
scorer *scorer.ConjunctionQueryScorer
initialized bool
options search.SearcherOptions
bytesRead uint64
}
func NewConjunctionSearcher(ctx context.Context, indexReader index.IndexReader,
qsearchers []search.Searcher, options search.SearcherOptions) (
search.Searcher, error,
) {
// build the sorted downstream searchers
searchers := make(OrderedSearcherList, len(qsearchers))
copy(searchers, qsearchers)
sort.Sort(searchers)
// attempt the "unadorned" conjunction optimization only when we
// do not need extra information like freq-norm's or term vectors
if len(searchers) > 1 &&
options.Score == "none" && !options.IncludeTermVectors {
rv, err := optimizeCompositeSearcher(ctx, "conjunction:unadorned",
indexReader, searchers, options)
if err != nil || rv != nil {
return rv, err
}
}
// build our searcher
rv := ConjunctionSearcher{
indexReader: indexReader,
options: options,
searchers: searchers,
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorer.NewConjunctionQueryScorer(options),
}
rv.computeQueryNorm()
// attempt push-down conjunction optimization when there's >1 searchers
if len(searchers) > 1 {
rv, err := optimizeCompositeSearcher(ctx, "conjunction",
indexReader, searchers, options)
if err != nil || rv != nil {
return rv, err
}
}
return &rv, nil
}
func (s *ConjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, searcher := range s.searchers {
sumOfSquaredWeights += searcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, searcher := range s.searchers {
searcher.SetQueryNorm(s.queryNorm)
}
}
func (s *ConjunctionSearcher) Size() int {
sizeInBytes := reflectStaticSizeConjunctionSearcher + size.SizeOfPtr +
s.scorer.Size()
for _, entry := range s.searchers {
sizeInBytes += entry.Size()
}
for _, entry := range s.currs {
if entry != nil {
sizeInBytes += entry.Size()
}
}
return sizeInBytes
}
func (s *ConjunctionSearcher) initSearchers(ctx *search.SearchContext) error {
var err error
// get all searchers pointing at their first match
for i, searcher := range s.searchers {
if s.currs[i] != nil {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = searcher.Next(ctx)
if err != nil {
return err
}
}
s.initialized = true
return nil
}
func (s *ConjunctionSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *ConjunctionSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *ConjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
var rv *search.DocumentMatch
var err error
OUTER:
for s.maxIDIdx < len(s.currs) && s.currs[s.maxIDIdx] != nil {
maxID := s.currs[s.maxIDIdx].IndexInternalID
i := 0
for i < len(s.currs) {
if s.currs[i] == nil {
return nil, nil
}
if i == s.maxIDIdx {
i++
continue
}
cmp := maxID.Compare(s.currs[i].IndexInternalID)
if cmp == 0 {
i++
continue
}
if cmp < 0 {
// maxID < currs[i], so we found a new maxIDIdx
s.maxIDIdx = i
// advance the positions where [0 <= x < i], since we
// know they were equal to the former max entry
maxID = s.currs[s.maxIDIdx].IndexInternalID
for x := 0; x < i; x++ {
err = s.advanceChild(ctx, x, maxID)
if err != nil {
return nil, err
}
}
continue OUTER
}
// maxID > currs[i], so need to advance searchers[i]
err = s.advanceChild(ctx, i, maxID)
if err != nil {
return nil, err
}
// don't bump i, so that we'll examine the just-advanced
// currs[i] again
}
// if we get here, a doc matched all readers, so score and add it
rv = s.scorer.Score(ctx, s.currs)
// we know all the searchers are pointing at the same thing
// so they all need to be bumped
for i, searcher := range s.searchers {
if s.currs[i] != rv {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = searcher.Next(ctx)
if err != nil {
return nil, err
}
}
// don't continue now, wait for the next call to Next()
break
}
return rv, nil
}
func (s *ConjunctionSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
for i := range s.searchers {
if s.currs[i] != nil && s.currs[i].IndexInternalID.Compare(ID) >= 0 {
continue
}
err := s.advanceChild(ctx, i, ID)
if err != nil {
return nil, err
}
}
return s.Next(ctx)
}
func (s *ConjunctionSearcher) advanceChild(ctx *search.SearchContext, i int, ID index.IndexInternalID) (err error) {
if s.currs[i] != nil {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = s.searchers[i].Advance(ctx, ID)
return err
}
func (s *ConjunctionSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *ConjunctionSearcher) Close() (rv error) {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil && rv == nil {
rv = err
}
}
return rv
}
func (s *ConjunctionSearcher) Min() int {
return 0
}
func (s *ConjunctionSearcher) DocumentMatchPoolSize() int {
rv := len(s.currs)
for _, s := range s.searchers {
rv += s.DocumentMatchPoolSize()
}
return rv
}
================================================
FILE: search/searcher/search_conjunction_nested.go
================================================
// Copyright (c) 2026 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"fmt"
"math"
"reflect"
"slices"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeNestedConjunctionSearcher int
func init() {
var ncs NestedConjunctionSearcher
reflectStaticSizeNestedConjunctionSearcher = int(reflect.TypeOf(ncs).Size())
}
type NestedConjunctionSearcher struct {
nestedReader index.NestedReader
searchers []search.Searcher
queryNorm float64
currs []*search.DocumentMatch
currAncestors [][]index.AncestorID
currKeys []index.AncestorID
initialized bool
joinIdx int
options search.SearcherOptions
docQueue *CoalesceQueue
// reusable ID buffer for Advance() calls
advanceID index.IndexInternalID
// reusable buffer for Advance() calls
ancestors []index.AncestorID
}
func NewNestedConjunctionSearcher(ctx context.Context, indexReader index.IndexReader,
searchers []search.Searcher, joinIdx int, options search.SearcherOptions) (search.Searcher, error) {
var nr index.NestedReader
var ok bool
if nr, ok = indexReader.(index.NestedReader); !ok {
return nil, fmt.Errorf("indexReader does not support nested documents")
}
// build our searcher
rv := NestedConjunctionSearcher{
nestedReader: nr,
options: options,
searchers: searchers,
currs: make([]*search.DocumentMatch, len(searchers)),
currAncestors: make([][]index.AncestorID, len(searchers)),
currKeys: make([]index.AncestorID, len(searchers)),
joinIdx: joinIdx,
docQueue: NewCoalesceQueue(),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *NestedConjunctionSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, searcher := range s.searchers {
sumOfSquaredWeights += searcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, searcher := range s.searchers {
searcher.SetQueryNorm(s.queryNorm)
}
}
func (s *NestedConjunctionSearcher) Size() int {
sizeInBytes := reflectStaticSizeNestedConjunctionSearcher + size.SizeOfPtr
for _, entry := range s.searchers {
sizeInBytes += entry.Size()
}
for _, entry := range s.currs {
if entry != nil {
sizeInBytes += entry.Size()
}
}
return sizeInBytes
}
func (s *NestedConjunctionSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *NestedConjunctionSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *NestedConjunctionSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *NestedConjunctionSearcher) Close() (rv error) {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil && rv == nil {
rv = err
}
}
return rv
}
func (s *NestedConjunctionSearcher) Min() int {
return 0
}
func (s *NestedConjunctionSearcher) DocumentMatchPoolSize() int {
rv := len(s.currs)
for _, s := range s.searchers {
rv += s.DocumentMatchPoolSize()
}
return rv
}
func (s *NestedConjunctionSearcher) initialize(ctx *search.SearchContext) (bool, error) {
var err error
for i, searcher := range s.searchers {
if s.currs[i] != nil {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = searcher.Next(ctx)
if err != nil {
return false, err
}
if s.currs[i] == nil {
// one of the searchers is exhausted, so we are done
return true, nil
}
// get the ancestry chain for this match
s.currAncestors[i], err = s.nestedReader.Ancestors(s.currs[i].IndexInternalID, s.currAncestors[i][:0])
if err != nil {
return false, err
}
// check if the ancestry chain is > joinIdx, if not we reset the joinIdx
// to the minimum possible value across all searchers, ideally this will be
// done in query construction time itself, by using the covering depth across
// all sub-queries, but we do this here as a fallback
if s.joinIdx >= len(s.currAncestors[i]) {
s.joinIdx = len(s.currAncestors[i]) - 1
}
}
// build currKeys for each searcher, do it here as we may have adjusted joinIdx
for i := range s.searchers {
s.currKeys[i] = ancestorFromRoot(s.currAncestors[i], s.joinIdx)
}
s.initialized = true
return false, nil
}
func (s *NestedConjunctionSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
// initialize on first call to Next, by getting first match
// from each searcher and their ancestry chains
if !s.initialized {
done, err := s.initialize(ctx)
if err != nil {
return nil, err
}
if done {
return nil, nil
}
}
// check if the docQueue has any buffered matches
if s.docQueue.Len() > 0 {
return s.docQueue.Dequeue(ctx), nil
}
// now enter the main alignment loop
n := len(s.searchers)
OUTER:
for {
// pick the pivot searcher with the highest key (ancestor at joinIdx level)
if s.currs[0] == nil {
return nil, nil
}
maxKey := s.currKeys[0]
for i := 1; i < n; i++ {
// currs[i] is nil means one of the searchers is exhausted
if s.currs[i] == nil {
return nil, nil
}
currKey := s.currKeys[i]
if maxKey.Compare(currKey) < 0 {
maxKey = currKey
}
}
// store maxkey as advanceID only once only if needed
var advanceID index.IndexInternalID
// flag to track if all searchers are aligned
var aligned bool = true
// now try to align all other searchers to the
// we check if the a searchers key matches maxKey
// if not, we advance the pivot searcher to maxKey
// else do nothing and move to the next searcher
for i := 0; i < n; i++ {
cmp := s.currKeys[i].Compare(maxKey)
if cmp < 0 {
// not aligned, so advance this searcher to maxKey
// convert maxKey to advanceID only once
if advanceID == nil {
advanceID = s.toAdvanceID(maxKey)
}
var err error
ctx.DocumentMatchPool.Put(s.currs[i])
s.currs[i], err = s.searchers[i].Advance(ctx, advanceID)
if err != nil {
return nil, err
}
if s.currs[i] == nil {
// one of the searchers is exhausted, so we are done
return nil, nil
}
// recalc ancestors
s.currAncestors[i], err = s.nestedReader.Ancestors(s.currs[i].IndexInternalID, s.currAncestors[i][:0])
if err != nil {
return nil, err
}
// recalc key
s.currKeys[i] = ancestorFromRoot(s.currAncestors[i], s.joinIdx)
// recalc cmp
cmp = s.currKeys[i].Compare(maxKey)
}
if cmp != 0 {
// not aligned
aligned = false
}
}
// now check if all the searchers are aligned at the same maxKey
// if they are not aligned, we need to restart the loop of picking
// the pivot searcher with the highest key
if !aligned {
continue OUTER
}
// if we are here, all the searchers are aligned at maxKey
// now we need to buffer all the intermediate matches for every
// searcher at this key, until either the searcher's key changes
// or the searcher is exhausted
var err error
for i := 0; i < n; i++ {
for {
// buffer the current match
s.docQueue.Enqueue(s.currs[i])
// advance to next match
s.currs[i], err = s.searchers[i].Next(ctx)
if err != nil {
return nil, err
}
if s.currs[i] == nil {
// searcher exhausted, break out
break
}
// recalc ancestors
s.currAncestors[i], err = s.nestedReader.Ancestors(s.currs[i].IndexInternalID, s.currAncestors[i][:0])
if err != nil {
return nil, err
}
// recalc key
s.currKeys[i] = ancestorFromRoot(s.currAncestors[i], s.joinIdx)
// check if key has changed
if !s.currKeys[i].Equals(maxKey) {
// key changed, break out
break
}
}
}
// finalize the docQueue for dequeueing
s.docQueue.Finalize()
// finally return the first buffered match
return s.docQueue.Dequeue(ctx), nil
}
}
// ancestorFromRoot gets the AncestorID at the given position from the root
// if pos is 0, it returns the root AncestorID, and so on
func ancestorFromRoot(ancestors []index.AncestorID, pos int) index.AncestorID {
return ancestors[len(ancestors)-pos-1]
}
// toAdvanceID converts an AncestorID to IndexInternalID, reusing the advanceID buffer.
// The returned ID is safe to pass to Advance() since Advance() never retains references.
func (s *NestedConjunctionSearcher) toAdvanceID(key index.AncestorID) index.IndexInternalID {
// Reset length to 0 while preserving capacity for buffer reuse
s.advanceID = s.advanceID[:0]
// Convert key to IndexInternalID, reusing the underlying buffer
s.advanceID = key.ToIndexInternalID(s.advanceID)
return s.advanceID
}
func (s *NestedConjunctionSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
done, err := s.initialize(ctx)
if err != nil {
return nil, err
}
if done {
return nil, nil
}
}
// first check if the docQueue has any buffered matches
// if so we first check if any of them can satisfy the Advance(ID)
for s.docQueue.Len() > 0 {
dm := s.docQueue.Dequeue(ctx)
if dm.IndexInternalID.Compare(ID) >= 0 {
return dm, nil
}
// otherwise recycle this match
ctx.DocumentMatchPool.Put(dm)
}
var err error
// now we first get the ancestry chain for the given ID
s.ancestors, err = s.nestedReader.Ancestors(ID, s.ancestors[:0])
if err != nil {
return nil, err
}
// we now follow the the following logic for each searcher:
// let S be the length of the ancestry chain for the searcher
// let I be the length of the ancestry chain for the given ID
// 1. if S > I:
// then we just Advance() the searcher to the given ID if required
// 2. else if S <= I:
// then we get the AncestorID at position (S - 1) from the root of
// the given ID's ancestry chain, and Advance() the searcher to
// it if required
for i, searcher := range s.searchers {
if s.currs[i] == nil {
return nil, nil // already exhausted, nothing to do
}
var targetID index.IndexInternalID
S := len(s.currAncestors[i])
I := len(s.ancestors)
if S > I {
// case 1: S > I
targetID = ID
} else {
// case 2: S <= I
targetID = s.toAdvanceID(ancestorFromRoot(s.ancestors, S-1))
}
if s.currs[i].IndexInternalID.Compare(targetID) < 0 {
// need to advance this searcher
ctx.DocumentMatchPool.Put(s.currs[i])
s.currs[i], err = searcher.Advance(ctx, targetID)
if err != nil {
return nil, err
}
if s.currs[i] == nil {
// one of the searchers is exhausted, so we are done
return nil, nil
}
// recalc ancestors
s.currAncestors[i], err = s.nestedReader.Ancestors(s.currs[i].IndexInternalID, s.currAncestors[i][:0])
if err != nil {
return nil, err
}
// recalc key
s.currKeys[i] = ancestorFromRoot(s.currAncestors[i], s.joinIdx)
}
}
// we need to call Next() in a loop until we reach or exceed the given ID
// the Next() call basically gives us a match that is aligned correctly, but
// if joinIdx < I, we can have multiple matches for the same joinIdx ancestor
// and they may be < ID, so we need to loop
for {
next, err := s.Next(ctx)
if err != nil {
return nil, err
}
if next == nil {
return nil, nil
}
if next.IndexInternalID.Compare(ID) >= 0 {
return next, nil
}
ctx.DocumentMatchPool.Put(next)
}
}
// ------------------------------------------------------------------------------------------
type CoalesceQueue struct {
order []*search.DocumentMatch // queue of DocumentMatch
}
func NewCoalesceQueue() *CoalesceQueue {
cq := &CoalesceQueue{
order: make([]*search.DocumentMatch, 0),
}
return cq
}
// Enqueue appends the given DocumentMatch to the queue. Coalescing of duplicates
// is deferred until Dequeue, after Finalize has sorted items by IndexInternalID.
func (cq *CoalesceQueue) Enqueue(it *search.DocumentMatch) {
// append to order slice (this is a stack)
cq.order = append(cq.order, it)
}
// Finalize prepares the queue for dequeue operations by sorting the items based on
// their IndexInternalID values. This MUST be called before any Dequeue operations,
// and after all Enqueue operations are complete. The sort is done in descending order
// so that dequeueing will basically be popping from the end of the slice, allowing for
// slice reuse.
func (cq *CoalesceQueue) Finalize() {
slices.SortFunc(cq.order, func(a, b *search.DocumentMatch) int {
return b.IndexInternalID.Compare(a.IndexInternalID)
})
}
// Dequeue removes and returns the next DocumentMatch in sorted order, merging any
// consecutive duplicates. Merged items are recycled via ctx.DocumentMatchPool.
// Returns nil when the queue is empty.
func (cq *CoalesceQueue) Dequeue(ctx *search.SearchContext) *search.DocumentMatch {
if cq.Len() == 0 {
return nil
}
// pop from end of slice
rv := cq.order[len(cq.order)-1]
cq.order = cq.order[:len(cq.order)-1]
// merge duplicates
for cq.Len() > 0 {
// peek at next item
next := cq.order[len(cq.order)-1]
if !rv.IndexInternalID.Equals(next.IndexInternalID) {
// different ID, stop merging
break
}
// pop the next item
cq.order = cq.order[:len(cq.order)-1]
// same ID, merge
rv.Score += next.Score
rv.Expl = rv.Expl.MergeWith(next.Expl)
rv.FieldTermLocations = search.MergeFieldTermLocationsFromMatch(
rv.FieldTermLocations, next)
// recycle the merged item
ctx.DocumentMatchPool.Put(next)
}
return rv
}
// Len returns the number of DocumentMatch items currently in the queue.
func (cq *CoalesceQueue) Len() int {
return len(cq.order)
}
================================================
FILE: search/searcher/search_conjunction_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"os"
"strings"
"testing"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestConjunctionSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
explainTrue := search.SearcherOptions{Explain: true}
// test 0
beerTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "beer", "desc", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
martyTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "marty", "name", 5.0, explainTrue)
if err != nil {
t.Fatal(err)
}
beerAndMartySearcher, err := NewConjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{beerTermSearcher, martyTermSearcher}, explainTrue)
if err != nil {
t.Fatal(err)
}
// test 1
angstTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "angst", "desc", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
beerTermSearcher2, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "beer", "desc", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
angstAndBeerSearcher, err := NewConjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{angstTermSearcher, beerTermSearcher2}, explainTrue)
if err != nil {
t.Fatal(err)
}
// test 2
beerTermSearcher3, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "beer", "desc", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
jackTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "jack", "name", 5.0, explainTrue)
if err != nil {
t.Fatal(err)
}
beerAndJackSearcher, err := NewConjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{beerTermSearcher3, jackTermSearcher}, explainTrue)
if err != nil {
t.Fatal(err)
}
// test 3
beerTermSearcher4, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "beer", "desc", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
misterTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "mister", "title", 5.0, explainTrue)
if err != nil {
t.Fatal(err)
}
beerAndMisterSearcher, err := NewConjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{beerTermSearcher4, misterTermSearcher}, explainTrue)
if err != nil {
t.Fatal(err)
}
// test 4
couchbaseTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "couchbase", "street", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
misterTermSearcher2, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "mister", "title", 5.0, explainTrue)
if err != nil {
t.Fatal(err)
}
couchbaseAndMisterSearcher, err := NewConjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{couchbaseTermSearcher, misterTermSearcher2}, explainTrue)
if err != nil {
t.Fatal(err)
}
// test 5
beerTermSearcher5, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "beer", "desc", 5.0, explainTrue)
if err != nil {
t.Fatal(err)
}
couchbaseTermSearcher2, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "couchbase", "street", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
misterTermSearcher3, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "mister", "title", 5.0, explainTrue)
if err != nil {
t.Fatal(err)
}
couchbaseAndMisterSearcher2, err := NewConjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{couchbaseTermSearcher2, misterTermSearcher3}, explainTrue)
if err != nil {
t.Fatal(err)
}
beerAndCouchbaseAndMisterSearcher, err := NewConjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{beerTermSearcher5, couchbaseAndMisterSearcher2}, explainTrue)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: beerAndMartySearcher,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("1"),
Score: 2.0097428702814377,
},
},
},
{
searcher: angstAndBeerSearcher,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("2"),
Score: 1.0807601687084403,
},
},
},
{
searcher: beerAndJackSearcher,
results: []*search.DocumentMatch{},
},
{
searcher: beerAndMisterSearcher,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("2"),
Score: 1.2877980334016337,
},
{
IndexInternalID: index.IndexInternalID("3"),
Score: 1.2877980334016337,
},
},
},
{
searcher: couchbaseAndMisterSearcher,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("2"),
Score: 1.4436599157093672,
},
},
},
{
searcher: beerAndCouchbaseAndMisterSearcher,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("2"),
Score: 1.441614953806971,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(10, 0),
}
next, err := test.searcher.Next(ctx)
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
next, err = test.searcher.Next(ctx)
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}
type compositeSearchOptimizationTest struct {
fieldTerms []string
expectEmpty string
}
func TestScorchCompositeSearchOptimizations(t *testing.T) {
dir, _ := os.MkdirTemp("", "scorchTwoDoc")
defer func() {
_ = os.RemoveAll(dir)
}()
twoDocIndex := initTwoDocScorch(dir)
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
tests := []compositeSearchOptimizationTest{
{
fieldTerms: []string{},
expectEmpty: "conjunction,disjunction",
},
{
fieldTerms: []string{"name:marty"},
expectEmpty: "",
},
{
fieldTerms: []string{"name:marty", "desc:beer"},
expectEmpty: "",
},
{
fieldTerms: []string{"name:marty", "name:marty"},
expectEmpty: "",
},
{
fieldTerms: []string{"name:marty", "desc:beer", "title:mister", "street:couchbase"},
expectEmpty: "conjunction",
},
{
fieldTerms: []string{"name:steve", "desc:beer", "title:mister", "street:couchbase"},
expectEmpty: "",
},
{
fieldTerms: []string{"name:NotARealName"},
expectEmpty: "conjunction,disjunction",
},
{
fieldTerms: []string{"name:NotARealName", "name:marty"},
expectEmpty: "conjunction",
},
{
fieldTerms: []string{"name:NotARealName", "name:marty", "desc:beer"},
expectEmpty: "conjunction",
},
{
fieldTerms: []string{"name:NotARealName", "name:marty", "name:marty"},
expectEmpty: "conjunction",
},
{
fieldTerms: []string{"name:NotARealName", "name:marty", "desc:beer", "title:mister", "street:couchbase"},
expectEmpty: "conjunction",
},
}
// The theme of this unit test is that given one of the above
// search test cases -- no matter what searcher options we
// provide, across either conjunctions or disjunctions, whether we
// have optimizations that are enabled or disabled, the set of doc
// ID's from the search results from any of those combinations
// should be the same.
searcherOptionsToCompare := []search.SearcherOptions{
{},
{Explain: true},
{IncludeTermVectors: true},
{IncludeTermVectors: true, Explain: true},
{Score: "none"},
{Score: "none", IncludeTermVectors: true},
{Score: "none", IncludeTermVectors: true, Explain: true},
{Score: "none", Explain: true},
}
testScorchCompositeSearchOptimizations(t, twoDocIndexReader, tests,
searcherOptionsToCompare, "conjunction")
testScorchCompositeSearchOptimizations(t, twoDocIndexReader, tests,
searcherOptionsToCompare, "disjunction")
}
func testScorchCompositeSearchOptimizations(t *testing.T, indexReader index.IndexReader,
tests []compositeSearchOptimizationTest,
searcherOptionsToCompare []search.SearcherOptions,
compositeKind string,
) {
for testi := range tests {
resultsToCompare := map[string]bool{}
testScorchCompositeSearchOptimizationsHelper(t, indexReader, tests, testi,
searcherOptionsToCompare, compositeKind, false, resultsToCompare)
testScorchCompositeSearchOptimizationsHelper(t, indexReader, tests, testi,
searcherOptionsToCompare, compositeKind, true, resultsToCompare)
}
}
func testScorchCompositeSearchOptimizationsHelper(
t *testing.T, indexReader index.IndexReader,
tests []compositeSearchOptimizationTest, testi int,
searcherOptionsToCompare []search.SearcherOptions,
compositeKind string, allowOptimizations bool, resultsToCompare map[string]bool,
) {
// Save the global allowed optimization settings to restore later.
optimizeConjunction := scorch.OptimizeConjunction
optimizeConjunctionUnadorned := scorch.OptimizeConjunctionUnadorned
optimizeDisjunctionUnadorned := scorch.OptimizeDisjunctionUnadorned
optimizeDisjunctionUnadornedMinChildCardinality := scorch.OptimizeDisjunctionUnadornedMinChildCardinality
scorch.OptimizeConjunction = allowOptimizations
scorch.OptimizeConjunctionUnadorned = allowOptimizations
scorch.OptimizeDisjunctionUnadorned = allowOptimizations
if allowOptimizations {
scorch.OptimizeDisjunctionUnadornedMinChildCardinality = uint64(0)
}
defer func() {
scorch.OptimizeConjunction = optimizeConjunction
scorch.OptimizeConjunctionUnadorned = optimizeConjunctionUnadorned
scorch.OptimizeDisjunctionUnadorned = optimizeDisjunctionUnadorned
scorch.OptimizeDisjunctionUnadornedMinChildCardinality = optimizeDisjunctionUnadornedMinChildCardinality
}()
test := tests[testi]
for searcherOptionsI, searcherOptions := range searcherOptionsToCompare {
// Construct the leaf term searchers.
var searchers []search.Searcher
for _, fieldTerm := range test.fieldTerms {
ft := strings.Split(fieldTerm, ":")
field := ft[0]
term := ft[1]
searcher, err := NewTermSearcher(context.TODO(), indexReader, term, field, 1.0, searcherOptions)
if err != nil {
t.Fatal(err)
}
searchers = append(searchers, searcher)
}
// Construct the composite searcher.
var cs search.Searcher
var err error
if compositeKind == "conjunction" {
cs, err = NewConjunctionSearcher(context.TODO(), indexReader, searchers, searcherOptions)
} else {
cs, err = NewDisjunctionSearcher(context.TODO(), indexReader, searchers, 0, searcherOptions)
}
if err != nil {
t.Fatal(err)
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(10, 0),
}
next, err := cs.Next(ctx)
i := 0
for err == nil && next != nil {
docID, err := indexReader.ExternalID(next.IndexInternalID)
if err != nil {
t.Fatal(err)
}
if searcherOptionsI == 0 && allowOptimizations == false {
resultsToCompare[string(docID)] = true
} else {
if !resultsToCompare[string(docID)] {
t.Errorf("missing %s", string(docID))
}
}
next, err = cs.Next(ctx)
if err != nil {
t.Fatalf("error iterating searcher: %v", err)
}
i++
}
if i != len(resultsToCompare) {
t.Errorf("mismatched count, %d vs %d", i, len(resultsToCompare))
}
if i == 0 && !strings.Contains(test.expectEmpty, compositeKind) {
t.Errorf("testi: %d, compositeKind: %s, allowOptimizations: %t,"+
" searcherOptionsI: %d, searcherOptions: %#v,"+
" expected some results but got no results on test: %#v",
testi, compositeKind, allowOptimizations,
searcherOptionsI, searcherOptions, test)
}
}
}
================================================
FILE: search/searcher/search_disjunction.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"fmt"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
// DisjunctionMaxClauseCount is a compile time setting that applications can
// adjust to non-zero value to cause the DisjunctionSearcher to return an
// error instead of executing searches when the size exceeds this value.
var DisjunctionMaxClauseCount = 0
// DisjunctionHeapTakeover is a compile time setting that applications can
// adjust to control when the DisjunctionSearcher will switch from a simple
// slice implementation to a heap implementation.
var DisjunctionHeapTakeover = 10
func NewDisjunctionSearcher(ctx context.Context, indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions) (
search.Searcher, error) {
return newDisjunctionSearcher(ctx, indexReader, qsearchers, min, options, true)
}
func optionsDisjunctionOptimizable(options search.SearcherOptions) bool {
rv := options.Score == "none" && !options.IncludeTermVectors
return rv
}
func newDisjunctionSearcher(ctx context.Context, indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions,
limit bool) (search.Searcher, error) {
var disjOverKNN bool
if ctx != nil {
disjOverKNN, _ = ctx.Value(search.IncludeScoreBreakdownKey).(bool)
}
if disjOverKNN {
// The KNN Searcher optimization is a necessary pre-req for the KNN Searchers,
// not an optional optimization like for, say term searchers.
// It's an optimization to repeat search an open vector index when applicable,
// rather than individually opening and searching a vector index.
err := optimizeKNN(ctx, indexReader, qsearchers)
if err != nil {
return nil, err
}
} else {
// attempt the "unadorned" disjunction optimization only when we
// do not need extra information like freq-norm's or term vectors
// and the requested min is simple
if len(qsearchers) > 1 && min <= 1 &&
optionsDisjunctionOptimizable(options) {
rv, err := optimizeCompositeSearcher(ctx, "disjunction:unadorned",
indexReader, qsearchers, options)
if err != nil || rv != nil {
return rv, err
}
}
}
if len(qsearchers) > DisjunctionHeapTakeover {
return newDisjunctionHeapSearcher(ctx, indexReader, qsearchers, min, options,
limit)
}
return newDisjunctionSliceSearcher(ctx, indexReader, qsearchers, min, options,
limit)
}
func optimizeCompositeSearcher(ctx context.Context, optimizationKind string,
indexReader index.IndexReader, qsearchers []search.Searcher,
options search.SearcherOptions) (search.Searcher, error) {
var octx index.OptimizableContext
for _, searcher := range qsearchers {
o, ok := searcher.(index.Optimizable)
if !ok {
return nil, nil
}
var err error
octx, err = o.Optimize(optimizationKind, octx)
if err != nil {
return nil, err
}
if octx == nil {
return nil, nil
}
}
optimized, err := octx.Finish()
if err != nil || optimized == nil {
return nil, err
}
tfr, ok := optimized.(index.TermFieldReader)
if !ok {
return nil, nil
}
return newTermSearcherFromReader(ctx, indexReader, tfr,
[]byte(optimizationKind), "*", 1.0, options)
}
func tooManyClauses(count int) bool {
if DisjunctionMaxClauseCount != 0 && count > DisjunctionMaxClauseCount {
return true
}
return false
}
func tooManyClausesErr(field string, count int) error {
return fmt.Errorf("TooManyClauses over field: `%s` [%d > maxClauseCount,"+
" which is set to %d]", field, count, DisjunctionMaxClauseCount)
}
================================================
FILE: search/searcher/search_disjunction_heap.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"container/heap"
"context"
"math"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/scorer"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeDisjunctionHeapSearcher int
var reflectStaticSizeSearcherCurr int
func init() {
var dhs DisjunctionHeapSearcher
reflectStaticSizeDisjunctionHeapSearcher = int(reflect.TypeOf(dhs).Size())
var sc SearcherCurr
reflectStaticSizeSearcherCurr = int(reflect.TypeOf(sc).Size())
}
type SearcherCurr struct {
searcher search.Searcher
curr *search.DocumentMatch
matchingIdx int
}
type DisjunctionHeapSearcher struct {
indexReader index.IndexReader
numSearchers int
scorer *scorer.DisjunctionQueryScorer
min int
queryNorm float64
retrieveScoreBreakdown bool
initialized bool
searchers []search.Searcher
heap []*SearcherCurr
matching []*search.DocumentMatch
matchingIdxs []int
matchingCurrs []*SearcherCurr
bytesRead uint64
}
func newDisjunctionHeapSearcher(ctx context.Context, indexReader index.IndexReader,
searchers []search.Searcher, min float64, options search.SearcherOptions,
limit bool) (
*DisjunctionHeapSearcher, error) {
if limit && tooManyClauses(len(searchers)) {
return nil, tooManyClausesErr("", len(searchers))
}
var retrieveScoreBreakdown bool
if ctx != nil {
retrieveScoreBreakdown, _ = ctx.Value(search.IncludeScoreBreakdownKey).(bool)
}
// build our searcher
rv := DisjunctionHeapSearcher{
indexReader: indexReader,
searchers: searchers,
numSearchers: len(searchers),
scorer: scorer.NewDisjunctionQueryScorer(options),
min: int(min),
matching: make([]*search.DocumentMatch, len(searchers)),
matchingCurrs: make([]*SearcherCurr, len(searchers)),
matchingIdxs: make([]int, len(searchers)),
retrieveScoreBreakdown: retrieveScoreBreakdown,
heap: make([]*SearcherCurr, 0, len(searchers)),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *DisjunctionHeapSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, searcher := range s.searchers {
sumOfSquaredWeights += searcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, searcher := range s.searchers {
searcher.SetQueryNorm(s.queryNorm)
}
}
func (s *DisjunctionHeapSearcher) Size() int {
sizeInBytes := reflectStaticSizeDisjunctionHeapSearcher + size.SizeOfPtr +
s.scorer.Size()
for _, entry := range s.searchers {
sizeInBytes += entry.Size()
}
for _, entry := range s.matching {
if entry != nil {
sizeInBytes += entry.Size()
}
}
// for matchingCurrs and heap, just use static size * len
// since searchers and document matches already counted above
sizeInBytes += len(s.matchingCurrs) * reflectStaticSizeSearcherCurr
sizeInBytes += len(s.heap) * reflectStaticSizeSearcherCurr
sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt
return sizeInBytes
}
func (s *DisjunctionHeapSearcher) initSearchers(ctx *search.SearchContext) error {
// alloc a single block of SearcherCurrs
block := make([]SearcherCurr, len(s.searchers))
// get all searchers pointing at their first match
for i, searcher := range s.searchers {
curr, err := searcher.Next(ctx)
if err != nil {
return err
}
if curr != nil {
block[i].searcher = searcher
block[i].curr = curr
block[i].matchingIdx = i
heap.Push(s, &block[i])
}
}
err := s.updateMatches()
if err != nil {
return err
}
s.initialized = true
return nil
}
func (s *DisjunctionHeapSearcher) updateMatches() error {
matching := s.matching[:0]
matchingCurrs := s.matchingCurrs[:0]
matchingIdxs := s.matchingIdxs[:0]
if len(s.heap) > 0 {
// top of the heap is our next hit
next := heap.Pop(s).(*SearcherCurr)
matching = append(matching, next.curr)
matchingCurrs = append(matchingCurrs, next)
matchingIdxs = append(matchingIdxs, next.matchingIdx)
// now as long as top of heap matches, keep popping
for len(s.heap) > 0 && next.curr.IndexInternalID.Equals(s.heap[0].curr.IndexInternalID) {
next = heap.Pop(s).(*SearcherCurr)
matching = append(matching, next.curr)
matchingCurrs = append(matchingCurrs, next)
matchingIdxs = append(matchingIdxs, next.matchingIdx)
}
}
s.matching = matching
s.matchingCurrs = matchingCurrs
s.matchingIdxs = matchingIdxs
return nil
}
func (s *DisjunctionHeapSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *DisjunctionHeapSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *DisjunctionHeapSearcher) Next(ctx *search.SearchContext) (
*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
var rv *search.DocumentMatch
found := false
for !found && len(s.matching) > 0 {
if len(s.matching) >= s.min {
found = true
if s.retrieveScoreBreakdown {
// just return score and expl breakdown here, since it is a disjunction over knn searchers,
// and the final score and expl is calculated in the knn collector
rv = s.scorer.ScoreAndExplBreakdown(ctx, s.matching, s.matchingIdxs, nil, s.numSearchers)
} else {
// score this match
rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
}
}
// invoke next on all the matching searchers
for _, matchingCurr := range s.matchingCurrs {
if matchingCurr.curr != rv {
ctx.DocumentMatchPool.Put(matchingCurr.curr)
}
curr, err := matchingCurr.searcher.Next(ctx)
if err != nil {
return nil, err
}
if curr != nil {
matchingCurr.curr = curr
heap.Push(s, matchingCurr)
}
}
err := s.updateMatches()
if err != nil {
return nil, err
}
}
return rv, nil
}
func (s *DisjunctionHeapSearcher) Advance(ctx *search.SearchContext,
ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
// if there is anything in matching, toss it back onto the heap
for _, matchingCurr := range s.matchingCurrs {
heap.Push(s, matchingCurr)
}
s.matching = s.matching[:0]
s.matchingCurrs = s.matchingCurrs[:0]
// find all searchers that actually need to be advanced
// advance them, using s.matchingCurrs as temp storage
for len(s.heap) > 0 && s.heap[0].curr.IndexInternalID.Compare(ID) < 0 {
searcherCurr := heap.Pop(s).(*SearcherCurr)
ctx.DocumentMatchPool.Put(searcherCurr.curr)
curr, err := searcherCurr.searcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
if curr != nil {
searcherCurr.curr = curr
s.matchingCurrs = append(s.matchingCurrs, searcherCurr)
}
}
// now all of the searchers that we advanced have to be pushed back
for _, matchingCurr := range s.matchingCurrs {
heap.Push(s, matchingCurr)
}
// reset our temp space
s.matchingCurrs = s.matchingCurrs[:0]
err := s.updateMatches()
if err != nil {
return nil, err
}
return s.Next(ctx)
}
func (s *DisjunctionHeapSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *DisjunctionHeapSearcher) Close() (rv error) {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil && rv == nil {
rv = err
}
}
return rv
}
func (s *DisjunctionHeapSearcher) Min() int {
return s.min
}
func (s *DisjunctionHeapSearcher) DocumentMatchPoolSize() int {
rv := len(s.searchers)
for _, s := range s.searchers {
rv += s.DocumentMatchPoolSize()
}
return rv
}
// a disjunction searcher implements the index.Optimizable interface
// but only activates on an edge case where the disjunction is a
// wrapper around a single Optimizable child searcher
func (s *DisjunctionHeapSearcher) Optimize(kind string, octx index.OptimizableContext) (
index.OptimizableContext, error) {
if len(s.searchers) == 1 {
o, ok := s.searchers[0].(index.Optimizable)
if ok {
return o.Optimize(kind, octx)
}
}
return nil, nil
}
// heap impl
func (s *DisjunctionHeapSearcher) Len() int { return len(s.heap) }
func (s *DisjunctionHeapSearcher) Less(i, j int) bool {
if s.heap[i].curr == nil {
return true
} else if s.heap[j].curr == nil {
return false
}
return s.heap[i].curr.IndexInternalID.Compare(s.heap[j].curr.IndexInternalID) < 0
}
func (s *DisjunctionHeapSearcher) Swap(i, j int) {
s.heap[i], s.heap[j] = s.heap[j], s.heap[i]
}
func (s *DisjunctionHeapSearcher) Push(x interface{}) {
s.heap = append(s.heap, x.(*SearcherCurr))
}
func (s *DisjunctionHeapSearcher) Pop() interface{} {
old := s.heap
n := len(old)
x := old[n-1]
s.heap = old[0 : n-1]
return x
}
================================================
FILE: search/searcher/search_disjunction_slice.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"math"
"reflect"
"sort"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/scorer"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeDisjunctionSliceSearcher int
func init() {
var ds DisjunctionSliceSearcher
reflectStaticSizeDisjunctionSliceSearcher = int(reflect.TypeOf(ds).Size())
}
type DisjunctionSliceSearcher struct {
indexReader index.IndexReader
searchers []search.Searcher
originalPos []int
numSearchers int
queryNorm float64
retrieveScoreBreakdown bool
currs []*search.DocumentMatch
scorer *scorer.DisjunctionQueryScorer
min int
matching []*search.DocumentMatch
matchingIdxs []int
initialized bool
bytesRead uint64
}
func newDisjunctionSliceSearcher(ctx context.Context, indexReader index.IndexReader,
qsearchers []search.Searcher, min float64, options search.SearcherOptions,
limit bool) (
*DisjunctionSliceSearcher, error,
) {
if limit && tooManyClauses(len(qsearchers)) {
return nil, tooManyClausesErr("", len(qsearchers))
}
var searchers OrderedSearcherList
var originalPos []int
var retrieveScoreBreakdown bool
if ctx != nil {
retrieveScoreBreakdown, _ = ctx.Value(search.IncludeScoreBreakdownKey).(bool)
}
if retrieveScoreBreakdown {
// needed only when kNN is in picture
sortedSearchers := &OrderedPositionalSearcherList{
searchers: make([]search.Searcher, len(qsearchers)),
index: make([]int, len(qsearchers)),
}
for i, searcher := range qsearchers {
sortedSearchers.searchers[i] = searcher
sortedSearchers.index[i] = i
}
sort.Sort(sortedSearchers)
searchers = sortedSearchers.searchers
originalPos = sortedSearchers.index
} else {
searchers = make(OrderedSearcherList, len(qsearchers))
copy(searchers, qsearchers)
sort.Sort(searchers)
}
rv := DisjunctionSliceSearcher{
indexReader: indexReader,
searchers: searchers,
originalPos: originalPos,
numSearchers: len(searchers),
currs: make([]*search.DocumentMatch, len(searchers)),
scorer: scorer.NewDisjunctionQueryScorer(options),
min: int(min),
retrieveScoreBreakdown: retrieveScoreBreakdown,
matching: make([]*search.DocumentMatch, len(searchers)),
matchingIdxs: make([]int, len(searchers)),
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *DisjunctionSliceSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
for _, searcher := range s.searchers {
sumOfSquaredWeights += searcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
for _, searcher := range s.searchers {
searcher.SetQueryNorm(s.queryNorm)
}
}
func (s *DisjunctionSliceSearcher) Size() int {
sizeInBytes := reflectStaticSizeDisjunctionSliceSearcher + size.SizeOfPtr +
s.scorer.Size()
for _, entry := range s.searchers {
sizeInBytes += entry.Size()
}
for _, entry := range s.currs {
if entry != nil {
sizeInBytes += entry.Size()
}
}
for _, entry := range s.matching {
if entry != nil {
sizeInBytes += entry.Size()
}
}
sizeInBytes += len(s.matchingIdxs) * size.SizeOfInt
sizeInBytes += len(s.originalPos) * size.SizeOfInt
return sizeInBytes
}
func (s *DisjunctionSliceSearcher) initSearchers(ctx *search.SearchContext) error {
var err error
// get all searchers pointing at their first match
for i, searcher := range s.searchers {
if s.currs[i] != nil {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = searcher.Next(ctx)
if err != nil {
return err
}
}
err = s.updateMatches()
if err != nil {
return err
}
s.initialized = true
return nil
}
func (s *DisjunctionSliceSearcher) updateMatches() error {
matching := s.matching[:0]
matchingIdxs := s.matchingIdxs[:0]
for i := 0; i < len(s.currs); i++ {
curr := s.currs[i]
if curr == nil {
continue
}
if len(matching) > 0 {
cmp := curr.IndexInternalID.Compare(matching[0].IndexInternalID)
if cmp > 0 {
continue
}
if cmp < 0 {
matching = matching[:0]
matchingIdxs = matchingIdxs[:0]
}
}
matching = append(matching, curr)
matchingIdxs = append(matchingIdxs, i)
}
s.matching = matching
s.matchingIdxs = matchingIdxs
return nil
}
func (s *DisjunctionSliceSearcher) Weight() float64 {
var rv float64
for _, searcher := range s.searchers {
rv += searcher.Weight()
}
return rv
}
func (s *DisjunctionSliceSearcher) SetQueryNorm(qnorm float64) {
for _, searcher := range s.searchers {
searcher.SetQueryNorm(qnorm)
}
}
func (s *DisjunctionSliceSearcher) Next(ctx *search.SearchContext) (
*search.DocumentMatch, error,
) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
var err error
var rv *search.DocumentMatch
found := false
for !found && len(s.matching) > 0 {
if len(s.matching) >= s.min {
found = true
if s.retrieveScoreBreakdown {
// just return score and expl breakdown here, since it is a disjunction over knn searchers,
// and the final score and expl is calculated in the knn collector
rv = s.scorer.ScoreAndExplBreakdown(ctx, s.matching, s.matchingIdxs, s.originalPos, s.numSearchers)
} else {
// score this match
rv = s.scorer.Score(ctx, s.matching, len(s.matching), s.numSearchers)
}
}
// invoke next on all the matching searchers
for _, i := range s.matchingIdxs {
searcher := s.searchers[i]
if s.currs[i] != rv {
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = searcher.Next(ctx)
if err != nil {
return nil, err
}
}
err = s.updateMatches()
if err != nil {
return nil, err
}
}
return rv, nil
}
func (s *DisjunctionSliceSearcher) Advance(ctx *search.SearchContext,
ID index.IndexInternalID,
) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
// get all searchers pointing at their first match
var err error
for i, searcher := range s.searchers {
if s.currs[i] != nil {
if s.currs[i].IndexInternalID.Compare(ID) >= 0 {
continue
}
ctx.DocumentMatchPool.Put(s.currs[i])
}
s.currs[i], err = searcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
}
err = s.updateMatches()
if err != nil {
return nil, err
}
return s.Next(ctx)
}
func (s *DisjunctionSliceSearcher) Count() uint64 {
// for now return a worst case
var sum uint64
for _, searcher := range s.searchers {
sum += searcher.Count()
}
return sum
}
func (s *DisjunctionSliceSearcher) Close() (rv error) {
for _, searcher := range s.searchers {
err := searcher.Close()
if err != nil && rv == nil {
rv = err
}
}
return rv
}
func (s *DisjunctionSliceSearcher) Min() int {
return s.min
}
func (s *DisjunctionSliceSearcher) DocumentMatchPoolSize() int {
rv := len(s.currs)
for _, s := range s.searchers {
rv += s.DocumentMatchPoolSize()
}
return rv
}
// a disjunction searcher implements the index.Optimizable interface
// but only activates on an edge case where the disjunction is a
// wrapper around a single Optimizable child searcher
func (s *DisjunctionSliceSearcher) Optimize(kind string, octx index.OptimizableContext) (
index.OptimizableContext, error,
) {
if len(s.searchers) == 1 {
o, ok := s.searchers[0].(index.Optimizable)
if ok {
return o.Optimize(kind, octx)
}
}
return nil, nil
}
================================================
FILE: search/searcher/search_disjunction_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"os"
"testing"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestDisjunctionSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
explainTrue := search.SearcherOptions{Explain: true}
martyTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "marty", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "dustin", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
martyOrDustinSearcher, err := NewDisjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, explainTrue)
if err != nil {
t.Fatal(err)
}
martyTermSearcher2, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "marty", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher2, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "dustin", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
martyOrDustinSearcher2, err := NewDisjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{martyTermSearcher2, dustinTermSearcher2}, 0, explainTrue)
if err != nil {
t.Fatal(err)
}
raviTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "ravi", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
nestedRaviOrMartyOrDustinSearcher, err := NewDisjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{raviTermSearcher, martyOrDustinSearcher2}, 0, explainTrue)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: martyOrDustinSearcher,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("1"),
Score: 0.6775110856165737,
},
{
IndexInternalID: index.IndexInternalID("3"),
Score: 0.6775110856165737,
},
},
},
// test a nested disjunction
{
searcher: nestedRaviOrMartyOrDustinSearcher,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("1"),
Score: 0.2765927424732821,
},
{
IndexInternalID: index.IndexInternalID("3"),
Score: 0.2765927424732821,
},
{
IndexInternalID: index.IndexInternalID("4"),
Score: 0.5531854849465642,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
}
next, err := test.searcher.Next(ctx)
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
ctx.DocumentMatchPool.Put(next)
next, err = test.searcher.Next(ctx)
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}
func TestDisjunctionAdvance(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
explainTrue := search.SearcherOptions{Explain: true}
martyTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "marty", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "dustin", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
martyOrDustinSearcher, err := NewDisjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher}, 0, explainTrue)
if err != nil {
t.Fatal(err)
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(martyOrDustinSearcher.DocumentMatchPoolSize(), 0),
}
match, err := martyOrDustinSearcher.Advance(ctx, index.IndexInternalID("3"))
if err != nil {
t.Errorf("unexpected error: %v", err)
}
if match == nil {
t.Errorf("expected 3, got nil")
}
}
func TestDisjunctionSearchTooMany(t *testing.T) {
// set to max to a low non-zero value
DisjunctionMaxClauseCount = 2
defer func() {
// reset it after the test
DisjunctionMaxClauseCount = 0
}()
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
explainTrue := search.SearcherOptions{Explain: true}
martyTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "marty", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "dustin", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
steveTermSearcher, err := NewTermSearcher(context.TODO(), twoDocIndexReader, "steve", "name", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
_, err = NewDisjunctionSearcher(context.TODO(), twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher, steveTermSearcher}, 0, explainTrue)
if err == nil {
t.Fatal(err)
}
}
func TestUnadornedDisjunctionAdvance(t *testing.T) {
dir, _ := os.MkdirTemp("", "scorchTwoDoc")
defer func() {
_ = os.RemoveAll(dir)
}()
twoDocIndex := initTwoDocScorch(dir)
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Fatal(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
getNewOptimizedCompositeSearcher := func(t *testing.T) search.Searcher {
optimizedCompositeSearcherOptions := search.SearcherOptions{Explain: false, IncludeTermVectors: false, Score: "none"}
martyTermSearcher, err := NewTermSearcher(context.Background(), twoDocIndexReader, "marty", "name", 1.0, optimizedCompositeSearcherOptions)
if err != nil {
t.Fatal(err)
}
dustinTermSearcher, err := NewTermSearcher(context.Background(), twoDocIndexReader, "dustin", "name", 1.0, optimizedCompositeSearcherOptions)
if err != nil {
t.Fatal(err)
}
steveTermSearcher, err := NewTermSearcher(context.Background(), twoDocIndexReader, "steve", "name", 1.0, optimizedCompositeSearcherOptions)
if err != nil {
t.Fatal(err)
}
martyOrDustinOrSteveSearcher, err := NewDisjunctionSearcher(context.Background(), twoDocIndexReader, []search.Searcher{martyTermSearcher, dustinTermSearcher, steveTermSearcher}, 0, optimizedCompositeSearcherOptions)
if err != nil {
t.Fatal(err)
}
return martyOrDustinOrSteveSearcher
}
martyOrDustinOrSteveSearcher := getNewOptimizedCompositeSearcher(t)
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(martyOrDustinOrSteveSearcher.DocumentMatchPoolSize(), 0),
}
// get the correct order using only next calls
dm, err := martyOrDustinOrSteveSearcher.Next(ctx)
if err != nil {
t.Fatal(err)
}
expectedDocIDs := []index.IndexInternalID{}
for dm != nil && err == nil {
expectedDocIDs = append(expectedDocIDs, dm.IndexInternalID)
dm, err = martyOrDustinOrSteveSearcher.Next(ctx)
}
if err != nil {
t.Fatal(err)
}
if len(expectedDocIDs) != 3 {
t.Fatalf("expected 3 results, got %d", len(expectedDocIDs))
}
// Test 1 - Advance in reverse direction after getting the correct order using only next calls
// Next(->) - Next(->) - Next(->) - Advance(<-) - Advance(<-)
for i := len(expectedDocIDs) - 1; i >= 0; i-- {
xID := expectedDocIDs[i]
dm, err = martyOrDustinOrSteveSearcher.Advance(ctx, xID)
if err != nil {
t.Fatal(err)
}
if dm == nil {
t.Fatalf("expected to find %v", xID)
}
if !dm.IndexInternalID.Equals(xID) {
t.Fatalf("expected %v, got %v", xID, dm.IndexInternalID)
}
}
// Test 2 - Advance in forward direction after getting the correct order using only next calls
// Next(->) - Next(->) - Next(->) - Advance(ResetTo0) - Advance(->) - Advance(->)
martyOrDustinOrSteveSearcher = getNewOptimizedCompositeSearcher(t)
for i := 0; i < len(expectedDocIDs); i++ {
xID := expectedDocIDs[i]
dm, err = martyOrDustinOrSteveSearcher.Advance(ctx, xID)
if err != nil {
t.Fatal(err)
}
if dm == nil {
t.Fatalf("expected to find %v", xID)
}
if !dm.IndexInternalID.Equals(xID) {
t.Fatalf("expected %v, got %v", xID, dm.IndexInternalID)
}
}
// Test 3 - Alternate Next and Advance calls
// Next(->) -> Next(->) -> Advance(<-) -> Next(->) -> Next(->) -> Advance(<-) -> Advance(<-) -> Next(->)
martyOrDustinOrSteveSearcher = getNewOptimizedCompositeSearcher(t)
goNext := func(expectedDocID index.IndexInternalID) {
dm, err = martyOrDustinOrSteveSearcher.Next(ctx)
if err != nil {
t.Fatal(err)
}
if dm == nil {
t.Fatal("expected a document, got nil")
}
if !dm.IndexInternalID.Equals(expectedDocID) {
t.Fatalf("expected %v, got %v", expectedDocID, dm.IndexInternalID)
}
}
goBack := func(goTo index.IndexInternalID) {
dm, err = martyOrDustinOrSteveSearcher.Advance(ctx, goTo)
if err != nil {
t.Fatal(err)
}
if dm == nil {
t.Fatalf("expected to find %v", goTo)
}
if !dm.IndexInternalID.Equals(goTo) {
t.Fatalf("expected %v, got %v", goTo, dm.IndexInternalID)
}
}
// Next (->)
goNext(expectedDocIDs[0])
// Next (->)
goNext(expectedDocIDs[1])
// Advance (<-)
goBack(expectedDocIDs[0])
// Next (->)
goNext(expectedDocIDs[1])
// Next (->)
goNext(expectedDocIDs[2])
// Advance (<-)
goBack(expectedDocIDs[1])
// Advance (<-)
goBack(expectedDocIDs[0])
// Next (->)
goNext(expectedDocIDs[1])
}
================================================
FILE: search/searcher/search_docid.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/scorer"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeDocIDSearcher int
func init() {
var ds DocIDSearcher
reflectStaticSizeDocIDSearcher = int(reflect.TypeOf(ds).Size())
}
// DocIDSearcher returns documents matching a predefined set of identifiers.
type DocIDSearcher struct {
reader index.DocIDReader
scorer *scorer.ConstantScorer
count int
}
func NewDocIDSearcher(ctx context.Context, indexReader index.IndexReader, ids []string, boost float64,
options search.SearcherOptions) (searcher *DocIDSearcher, err error) {
reader, err := indexReader.DocIDReaderOnly(ids)
if err != nil {
return nil, err
}
scorer := scorer.NewConstantScorer(1.0, boost, options)
return &DocIDSearcher{
scorer: scorer,
reader: reader,
count: len(ids),
}, nil
}
func (s *DocIDSearcher) Size() int {
return reflectStaticSizeDocIDSearcher + size.SizeOfPtr +
s.reader.Size() +
s.scorer.Size()
}
func (s *DocIDSearcher) Count() uint64 {
return uint64(s.count)
}
func (s *DocIDSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *DocIDSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *DocIDSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
docidMatch, err := s.reader.Next()
if err != nil {
return nil, err
}
if docidMatch == nil {
return nil, nil
}
docMatch := s.scorer.Score(ctx, docidMatch)
return docMatch, nil
}
func (s *DocIDSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
docidMatch, err := s.reader.Advance(ID)
if err != nil {
return nil, err
}
if docidMatch == nil {
return nil, nil
}
docMatch := s.scorer.Score(ctx, docidMatch)
return docMatch, nil
}
func (s *DocIDSearcher) Close() error {
return s.reader.Close()
}
func (s *DocIDSearcher) Min() int {
return 0
}
func (s *DocIDSearcher) DocumentMatchPoolSize() int {
return 1
}
================================================
FILE: search/searcher/search_docid_test.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"testing"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/index/upsidedown"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func testDocIDSearcher(t *testing.T, indexed, searched, wanted []string) {
analysisQueue := index.NewAnalysisQueue(1)
i, err := upsidedown.NewUpsideDownCouch(
gtreap.Name,
map[string]interface{}{
"path": "",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
for _, id := range indexed {
doc := document.NewDocument(id)
doc.AddField(document.NewTextField("desc", []uint64{}, []byte("beer")))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
}
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
explainOff := search.SearcherOptions{Explain: false}
searcher, err := NewDocIDSearcher(context.TODO(), indexReader, searched, 1.0, explainOff)
if err != nil {
t.Fatal(err)
}
defer func() {
err := searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(searcher.DocumentMatchPoolSize(), 0),
}
// Check the sequence
for i, id := range wanted {
m, err := searcher.Next(ctx)
if err != nil {
t.Fatal(err)
}
if !index.IndexInternalID(id).Equals(m.IndexInternalID) {
t.Fatalf("expected %v at position %v, got %v", id, i, m.IndexInternalID)
}
ctx.DocumentMatchPool.Put(m)
}
m, err := searcher.Next(ctx)
if err != nil {
t.Fatal(err)
}
if m != nil {
t.Fatalf("expected nil past the end of the sequence, got %v", m.IndexInternalID)
}
ctx.DocumentMatchPool.Put(m)
// Check seeking
for _, id := range wanted {
if len(id) != 2 {
t.Fatalf("expected identifier must be 2 characters long, got %v", id)
}
before := id[:1]
for _, target := range []string{before, id} {
m, err := searcher.Advance(ctx, index.IndexInternalID(target))
if err != nil {
t.Fatal(err)
}
if m == nil || !m.IndexInternalID.Equals(index.IndexInternalID(id)) {
t.Fatalf("advancing to %v returned %v instead of %v", before, m, id)
}
ctx.DocumentMatchPool.Put(m)
}
}
// Seek after the end of the sequence
after := "zzz"
m, err = searcher.Advance(ctx, index.IndexInternalID(after))
if err != nil {
t.Fatal(err)
}
if m != nil {
t.Fatalf("advancing past the end of the sequence should return nil, got %v", m)
}
ctx.DocumentMatchPool.Put(m)
}
func TestDocIDSearcherEmptySearchEmptyIndex(t *testing.T) {
testDocIDSearcher(t, nil, nil, nil)
}
func TestDocIDSearcherEmptyIndex(t *testing.T) {
testDocIDSearcher(t, nil, []string{"aa", "bb"}, nil)
}
func TestDocIDSearcherEmptySearch(t *testing.T) {
testDocIDSearcher(t, []string{"aa", "bb"}, nil, nil)
}
func TestDocIDSearcherValid(t *testing.T) {
// Test missing, out of order and duplicate inputs
testDocIDSearcher(t, []string{"aa", "bb", "cc"},
[]string{"ee", "bb", "aa", "bb"},
[]string{"aa", "bb"})
}
================================================
FILE: search/searcher/search_filter.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeFilteringSearcher int
func init() {
var fs FilteringSearcher
reflectStaticSizeFilteringSearcher = int(reflect.TypeOf(fs).Size())
}
// FilterFunc defines a function which can filter documents
// returning true means keep the document
// returning false means do not keep the document
type FilterFunc func(sctx *search.SearchContext, d *search.DocumentMatch) bool
// FilteringSearcher wraps any other searcher, but checks any Next/Advance
// call against the supplied FilterFunc
type FilteringSearcher struct {
child search.Searcher
accept FilterFunc
}
func NewFilteringSearcher(ctx context.Context, s search.Searcher, filter FilterFunc) *FilteringSearcher {
return &FilteringSearcher{
child: s,
accept: filter,
}
}
func (f *FilteringSearcher) Size() int {
return reflectStaticSizeFilteringSearcher + size.SizeOfPtr +
f.child.Size()
}
func (f *FilteringSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
next, err := f.child.Next(ctx)
for next != nil && err == nil {
if f.accept(ctx, next) {
return next, nil
}
// recycle this document match now, since
// we do not need it anymore
ctx.DocumentMatchPool.Put(next)
next, err = f.child.Next(ctx)
}
return nil, err
}
func (f *FilteringSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
adv, err := f.child.Advance(ctx, ID)
if err != nil {
return nil, err
}
if adv == nil {
return nil, nil
}
if f.accept(ctx, adv) {
return adv, nil
}
// recycle this document match now, since
// we do not need it anymore
ctx.DocumentMatchPool.Put(adv)
return f.Next(ctx)
}
func (f *FilteringSearcher) Close() error {
return f.child.Close()
}
func (f *FilteringSearcher) Weight() float64 {
return f.child.Weight()
}
func (f *FilteringSearcher) SetQueryNorm(n float64) {
f.child.SetQueryNorm(n)
}
func (f *FilteringSearcher) Count() uint64 {
return f.child.Count()
}
func (f *FilteringSearcher) Min() int {
return f.child.Min()
}
func (f *FilteringSearcher) DocumentMatchPoolSize() int {
return f.child.DocumentMatchPoolSize()
}
================================================
FILE: search/searcher/search_fuzzy.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"fmt"
"strings"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
var MaxFuzziness = 2
// AutoFuzzinessHighThreshold is the threshold for the term length
// above which the fuzziness is set to MaxFuzziness when the fuzziness
// mode is set to AutoFuzziness.
var AutoFuzzinessHighThreshold = 5
// AutoFuzzinessLowThreshold is the threshold for the term length
// below which the fuzziness is set to zero when the fuzziness mode
// is set to AutoFuzziness.
// For terms with length between AutoFuzzinessLowThreshold and
// AutoFuzzinessHighThreshold, the fuzziness is set to
// MaxFuzziness - 1.
var AutoFuzzinessLowThreshold = 2
func NewFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term string,
prefix, fuzziness int, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
if fuzziness > MaxFuzziness {
return nil, fmt.Errorf("fuzziness exceeds max (%d)", MaxFuzziness)
}
if fuzziness < 0 {
return nil, fmt.Errorf("invalid fuzziness, negative")
}
if fuzziness == 0 {
// no fuzziness, just do a term search
// check if the call is made from a phrase searcher
// and if so, add the term to the fuzzy term matches
// since the fuzzy candidate terms are not collected
// for a term search, and the only candidate term is
// the term itself
if ctx != nil {
fuzzyTermMatches := ctx.Value(search.FuzzyMatchPhraseKey)
if fuzzyTermMatches != nil {
fuzzyTermMatches.(map[string][]string)[term] = []string{term}
}
}
return NewTermSearcher(ctx, indexReader, term, field, boost, options)
}
// Note: we don't byte slice the term for a prefix because of runes.
prefixTerm := ""
for i, r := range term {
if i < prefix {
prefixTerm += string(r)
} else {
break
}
}
fuzzyCandidates, err := findFuzzyCandidateTerms(ctx, indexReader, term, fuzziness,
field, prefixTerm)
if err != nil {
return nil, err
}
var candidates []string
var editDistances []uint8
var dictBytesRead uint64
if fuzzyCandidates != nil {
candidates = fuzzyCandidates.candidates
editDistances = fuzzyCandidates.editDistances
dictBytesRead = fuzzyCandidates.bytesRead
}
if ctx != nil {
reportIOStats(ctx, dictBytesRead)
search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
fuzzyTermMatches := ctx.Value(search.FuzzyMatchPhraseKey)
if fuzzyTermMatches != nil {
fuzzyTermMatches.(map[string][]string)[term] = candidates
}
}
// check if the candidates are empty or have one term which is the term itself
if len(candidates) == 0 || (len(candidates) == 1 && candidates[0] == term) {
if ctx != nil {
fuzzyTermMatches := ctx.Value(search.FuzzyMatchPhraseKey)
if fuzzyTermMatches != nil {
fuzzyTermMatches.(map[string][]string)[term] = []string{term}
}
}
return NewTermSearcher(ctx, indexReader, term, field, boost, options)
}
return NewMultiTermSearcherBoosted(ctx, indexReader, candidates, field,
boost, editDistances, options, true)
}
func GetAutoFuzziness(term string) int {
termLength := len(term)
if termLength > AutoFuzzinessHighThreshold {
return MaxFuzziness
} else if termLength > AutoFuzzinessLowThreshold {
return MaxFuzziness - 1
}
return 0
}
func NewAutoFuzzySearcher(ctx context.Context, indexReader index.IndexReader, term string,
prefix int, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
return NewFuzzySearcher(ctx, indexReader, term, prefix, GetAutoFuzziness(term), field, boost, options)
}
type fuzzyCandidates struct {
candidates []string
editDistances []uint8
bytesRead uint64
}
func reportIOStats(ctx context.Context, bytesRead uint64) {
// The fuzzy, regexp like queries essentially load a dictionary,
// which potentially incurs a cost that must be accounted by
// using the callback to report the value.
if ctx != nil {
statsCallbackFn := ctx.Value(search.SearchIOStatsCallbackKey)
if statsCallbackFn != nil {
statsCallbackFn.(search.SearchIOStatsCallbackFunc)(bytesRead)
}
}
}
func findFuzzyCandidateTerms(ctx context.Context, indexReader index.IndexReader, term string,
fuzziness int, field, prefixTerm string) (rv *fuzzyCandidates, err error) {
rv = &fuzzyCandidates{
candidates: make([]string, 0),
editDistances: make([]uint8, 0),
}
// in case of advanced reader implementations directly call
// the levenshtein automaton based iterator to collect the
// candidate terms
if ir, ok := indexReader.(index.IndexReaderFuzzy); ok {
termSet := make(map[string]struct{})
addCandidateTerm := func(term string, editDistance uint8) error {
if _, exists := termSet[term]; !exists {
termSet[term] = struct{}{}
rv.candidates = append(rv.candidates, term)
rv.editDistances = append(rv.editDistances, editDistance)
if tooManyClauses(len(rv.candidates)) {
return tooManyClausesErr(field, len(rv.candidates))
}
}
return nil
}
fieldDict, a, err := ir.FieldDictFuzzyAutomaton(field, term, fuzziness, prefixTerm)
if err != nil {
return nil, err
}
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
err = addCandidateTerm(tfd.Term, tfd.EditDistance)
if err != nil {
return nil, err
}
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
if ctx != nil {
if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
if ts, exists := fts[field]; exists {
for term := range ts {
if _, exists := termSet[term]; exists {
continue
}
if !strings.HasPrefix(term, prefixTerm) {
continue
}
match, editDistance := a.MatchAndDistance(term)
if match {
err = addCandidateTerm(term, editDistance)
if err != nil {
return nil, err
}
}
}
}
}
}
rv.bytesRead = fieldDict.BytesRead()
return rv, nil
}
var fieldDict index.FieldDict
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
fieldDict, err = indexReader.FieldDict(field)
}
if err != nil {
return nil, err
}
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
// enumerate terms and check levenshtein distance
var reuse []int
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
var ld int
var exceeded bool
ld, exceeded, reuse = search.LevenshteinDistanceMaxReuseSlice(term, tfd.Term, fuzziness, reuse)
if !exceeded && ld <= fuzziness {
rv.candidates = append(rv.candidates, tfd.Term)
rv.editDistances = append(rv.editDistances, uint8(ld))
if tooManyClauses(len(rv.candidates)) {
return nil, tooManyClausesErr(field, len(rv.candidates))
}
}
tfd, err = fieldDict.Next()
}
rv.bytesRead = fieldDict.BytesRead()
return rv, err
}
================================================
FILE: search/searcher/search_fuzzy_test.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"testing"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestFuzzySearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
explainTrue := search.SearcherOptions{Explain: true}
fuzzySearcherbeet, err := NewFuzzySearcher(context.TODO(), twoDocIndexReader, "beet", 0, 1, "desc", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
fuzzySearcherdouches, err := NewFuzzySearcher(context.TODO(), twoDocIndexReader, "douches", 0, 2, "desc", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
fuzzySearcheraplee, err := NewFuzzySearcher(context.TODO(), twoDocIndexReader, "aplee", 0, 2, "desc", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
fuzzySearcherprefix, err := NewFuzzySearcher(context.TODO(), twoDocIndexReader, "water", 3, 2, "desc", 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: fuzzySearcherbeet,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("1"),
Score: 1.0,
},
{
IndexInternalID: index.IndexInternalID("2"),
Score: 0.5,
},
{
IndexInternalID: index.IndexInternalID("3"),
Score: 0.5,
},
{
IndexInternalID: index.IndexInternalID("4"),
Score: 0.9999999838027345,
},
},
},
{
searcher: fuzzySearcherdouches,
results: []*search.DocumentMatch{},
},
{
searcher: fuzzySearcheraplee,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("3"),
Score: 0.9581453659370776,
},
},
},
{
searcher: fuzzySearcherprefix,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("5"),
Score: 1.916290731874155,
},
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
}
next, err := test.searcher.Next(ctx)
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex)
}
if next.Score != test.results[i].Score {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
ctx.DocumentMatchPool.Put(next)
next, err = test.searcher.Next(ctx)
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}
func TestFuzzySearchLimitErrors(t *testing.T) {
explainTrue := search.SearcherOptions{Explain: true}
_, err := NewFuzzySearcher(context.TODO(), nil, "water", 3, 3, "desc", 1.0, explainTrue)
if err == nil {
t.Fatal("`fuzziness exceeds max (2)` error expected")
}
_, err = NewFuzzySearcher(context.TODO(), nil, "water", 3, -1, "desc", 1.0, explainTrue)
if err == nil {
t.Fatal("`invalid fuzziness, negative` error expected")
}
}
================================================
FILE: search/searcher/search_geoboundingbox.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
type filterFunc func(key []byte) bool
var (
GeoBitsShift1 = geo.GeoBits << 1
GeoBitsShift1Minus1 = GeoBitsShift1 - 1
)
func NewGeoBoundingBoxSearcher(ctx context.Context, indexReader index.IndexReader, minLon, minLat,
maxLon, maxLat float64, field string, boost float64,
options search.SearcherOptions, checkBoundaries bool) (
search.Searcher, error,
) {
if tp, ok := indexReader.(index.SpatialIndexPlugin); ok {
sp, err := tp.GetSpatialAnalyzerPlugin("s2")
if err == nil {
terms := sp.GetQueryTokens(geo.NewBoundedRectangle(minLat,
minLon, maxLat, maxLon))
boxSearcher, err := NewMultiTermSearcher(ctx, indexReader,
terms, field, boost, options, false)
if err != nil {
return nil, err
}
dvReader, err := indexReader.DocValueReader([]string{field})
if err != nil {
return nil, err
}
return NewFilteringSearcher(ctx, boxSearcher, buildRectFilter(ctx, dvReader,
minLon, minLat, maxLon, maxLat)), nil
}
}
// indexes without the spatial plugin override would continue here.
// track list of opened searchers, for cleanup on early exit
var openedSearchers []search.Searcher
cleanupOpenedSearchers := func() {
for _, s := range openedSearchers {
_ = s.Close()
}
}
// do math to produce list of terms needed for this search
onBoundaryTerms, notOnBoundaryTerms, err := ComputeGeoRange(context.TODO(), 0, GeoBitsShift1Minus1,
minLon, minLat, maxLon, maxLat, checkBoundaries, indexReader, field)
if err != nil {
return nil, err
}
var onBoundarySearcher search.Searcher
dvReader, err := indexReader.DocValueReader([]string{field})
if err != nil {
return nil, err
}
if len(onBoundaryTerms) > 0 {
rawOnBoundarySearcher, err := NewMultiTermSearcherBytes(ctx, indexReader,
onBoundaryTerms, field, boost, options, false)
if err != nil {
return nil, err
}
// add filter to check points near the boundary
onBoundarySearcher = NewFilteringSearcher(ctx, rawOnBoundarySearcher,
buildRectFilter(ctx, dvReader, minLon, minLat, maxLon, maxLat))
openedSearchers = append(openedSearchers, onBoundarySearcher)
}
var notOnBoundarySearcher search.Searcher
if len(notOnBoundaryTerms) > 0 {
var err error
notOnBoundarySearcher, err = NewMultiTermSearcherBytes(ctx, indexReader,
notOnBoundaryTerms, field, boost, options, false)
if err != nil {
cleanupOpenedSearchers()
return nil, err
}
openedSearchers = append(openedSearchers, notOnBoundarySearcher)
}
if onBoundarySearcher != nil && notOnBoundarySearcher != nil {
rv, err := NewDisjunctionSearcher(ctx, indexReader,
[]search.Searcher{
onBoundarySearcher,
notOnBoundarySearcher,
},
0, options)
if err != nil {
cleanupOpenedSearchers()
return nil, err
}
return rv, nil
} else if onBoundarySearcher != nil {
return onBoundarySearcher, nil
} else if notOnBoundarySearcher != nil {
return notOnBoundarySearcher, nil
}
return NewMatchNoneSearcher(indexReader)
}
var (
geoMaxShift = document.GeoPrecisionStep * 4
geoDetailLevel = ((geo.GeoBits << 1) - geoMaxShift) / 2
)
type closeFunc func() error
func ComputeGeoRange(ctx context.Context, term uint64, shift uint,
sminLon, sminLat, smaxLon, smaxLat float64, checkBoundaries bool,
indexReader index.IndexReader, field string) (
onBoundary [][]byte, notOnBoundary [][]byte, err error,
) {
isIndexed, closeF, err := buildIsIndexedFunc(ctx, indexReader, field)
if closeF != nil {
defer func() {
cerr := closeF()
if cerr != nil {
err = cerr
}
}()
}
grc := &geoRangeCompute{
preallocBytesLen: 32,
preallocBytes: make([]byte, 32),
sminLon: sminLon,
sminLat: sminLat,
smaxLon: smaxLon,
smaxLat: smaxLat,
checkBoundaries: checkBoundaries,
isIndexed: isIndexed,
}
grc.computeGeoRange(term, shift)
return grc.onBoundary, grc.notOnBoundary, nil
}
func buildIsIndexedFunc(ctx context.Context, indexReader index.IndexReader, field string) (isIndexed filterFunc, closeF closeFunc, err error) {
if irr, ok := indexReader.(index.IndexReaderContains); ok {
fieldDict, err := irr.FieldDictContains(field)
if err != nil {
return nil, nil, err
}
isIndexed = func(term []byte) bool {
found, err := fieldDict.Contains(term)
return err == nil && found
}
closeF = func() error {
if fd, ok := fieldDict.(index.FieldDict); ok {
err := fd.Close()
if err != nil {
return err
}
}
return nil
}
} else if indexReader != nil {
isIndexed = func(term []byte) bool {
reader, err := indexReader.TermFieldReader(ctx, term, field, false, false, false)
if err != nil || reader == nil {
return false
}
if reader.Count() == 0 {
_ = reader.Close()
return false
}
_ = reader.Close()
return true
}
} else {
isIndexed = func([]byte) bool {
return true
}
}
return isIndexed, closeF, err
}
func buildRectFilter(ctx context.Context, dvReader index.DocValueReader,
minLon, minLat, maxLon, maxLat float64,
) FilterFunc {
// reuse the following for each document match that is checked using the filter
var lons, lats []float64
var found bool
dvVisitor := func(_ string, term []byte) {
if found {
// avoid redundant work if already found
return
}
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
var i64 int64
i64, err = prefixCoded.Int64()
if err == nil {
lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
found = true
}
}
}
return func(sctx *search.SearchContext, d *search.DocumentMatch) bool {
// check geo matches against all numeric type terms indexed
lons, lats = lons[:0], lats[:0]
found = false
if err := dvReader.VisitDocValues(d.IndexInternalID, dvVisitor); err == nil && found {
bytes := dvReader.BytesRead()
if bytes > 0 {
reportIOStats(ctx, bytes)
search.RecordSearchCost(ctx, search.AddM, bytes)
}
for i := range lons {
if geo.BoundingBoxContains(lons[i], lats[i],
minLon, minLat, maxLon, maxLat) {
return true
}
}
}
return false
}
}
type geoRangeCompute struct {
preallocBytesLen int
preallocBytes []byte
sminLon, sminLat, smaxLon, smaxLat float64
checkBoundaries bool
onBoundary, notOnBoundary [][]byte
isIndexed func(term []byte) bool
}
func (grc *geoRangeCompute) makePrefixCoded(in int64, shift uint) (rv numeric.PrefixCoded) {
if len(grc.preallocBytes) <= 0 {
grc.preallocBytesLen = grc.preallocBytesLen * 2
grc.preallocBytes = make([]byte, grc.preallocBytesLen)
}
rv, grc.preallocBytes, _ = numeric.NewPrefixCodedInt64Prealloc(in, shift, grc.preallocBytes)
return rv
}
func (grc *geoRangeCompute) computeGeoRange(term uint64, shift uint) {
split := term | uint64(0x1)<> 1
within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat,
grc.sminLon, grc.sminLat, grc.smaxLon, grc.smaxLat)
if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
grc.sminLon, grc.sminLat, grc.smaxLon, grc.smaxLat)) {
codedTerm := grc.makePrefixCoded(int64(start), res)
if grc.isIndexed(codedTerm) {
if !within && grc.checkBoundaries {
grc.onBoundary = append(grc.onBoundary, codedTerm)
} else {
grc.notOnBoundary = append(grc.notOnBoundary, codedTerm)
}
}
} else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
grc.sminLon, grc.sminLat, grc.smaxLon, grc.smaxLat) {
grc.computeGeoRange(start, res-1)
}
}
================================================
FILE: search/searcher/search_geoboundingbox_test.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/index/upsidedown"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestGeoBoundingBox(t *testing.T) {
tests := []struct {
minLon float64
minLat float64
maxLon float64
maxLat float64
field string
want []string
}{
{10.001, 10.001, 20.002, 20.002, "loc", nil},
{0.001, 0.001, 0.002, 0.002, "loc", []string{"a"}},
{0.001, 0.001, 1.002, 1.002, "loc", []string{"a", "b"}},
{0.001, 0.001, 9.002, 9.002, "loc", []string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"}},
// same upper-left, bottom-right point
{25, 25, 25, 25, "loc", nil},
// box that would return points, but points reversed
{0.002, 0.002, 0.001, 0.001, "loc", nil},
}
i := setupGeo(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for _, test := range tests {
got, err := testGeoBoundingBoxSearch(indexReader, test.minLon, test.minLat, test.maxLon, test.maxLat, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("expected %v, got %v for %f %f %f %f %s", test.want, got, test.minLon, test.minLat, test.maxLon, test.maxLat, test.field)
}
}
}
func testGeoBoundingBoxSearch(i index.IndexReader, minLon, minLat, maxLon, maxLat float64, field string) ([]string, error) {
var rv []string
gbs, err := NewGeoBoundingBoxSearcher(context.TODO(), i, minLon, minLat, maxLon, maxLat, field, 1.0, search.SearcherOptions{}, true)
if err != nil {
return nil, err
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(gbs.DocumentMatchPoolSize(), 0),
}
docMatch, err := gbs.Next(ctx)
for docMatch != nil && err == nil {
rv = append(rv, string(docMatch.IndexInternalID))
docMatch, err = gbs.Next(ctx)
}
if err != nil {
return nil, err
}
return rv, nil
}
func setupGeo(t *testing.T) index.Index {
analysisQueue := index.NewAnalysisQueue(1)
i, err := upsidedown.NewUpsideDownCouch(
gtreap.Name,
map[string]interface{}{
"path": "",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
doc := document.NewDocument("a")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, 0.0015, 0.0015))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("b")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, 1.0015, 1.0015))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("c")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, 2.0015, 2.0015))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("d")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, 3.0015, 3.0015))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("e")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, 4.0015, 4.0015))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("f")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, 5.0015, 5.0015))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("g")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, 6.0015, 6.0015))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("h")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, 7.0015, 7.0015))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("i")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, 8.0015, 8.0015))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("j")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, 9.0015, 9.0015))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
return i
}
func TestComputeGeoRange(t *testing.T) {
tests := []struct {
degs float64
onBoundary int
offBoundary int
err string
}{
{0.01, 4, 0, ""},
{0.1, 56, 144, ""},
{100.0, 32768, 258560, ""},
}
for testi, test := range tests {
onBoundaryRes, offBoundaryRes, err := ComputeGeoRange(context.TODO(), 0, GeoBitsShift1Minus1,
-1.0*test.degs, -1.0*test.degs, test.degs, test.degs, true, nil, "")
if (err != nil) != (test.err != "") {
t.Errorf("test: %+v, err: %v", test, err)
}
if len(onBoundaryRes) != test.onBoundary {
t.Errorf("test: %+v, onBoundaryRes: %v", test, len(onBoundaryRes))
}
if len(offBoundaryRes) != test.offBoundary {
t.Errorf("test: %+v, offBoundaryRes: %v", test, len(offBoundaryRes))
}
onBROrig, offBROrig := origComputeGeoRange(0, GeoBitsShift1Minus1,
-1.0*test.degs, -1.0*test.degs, test.degs, test.degs, true)
if !reflect.DeepEqual(onBoundaryRes, onBROrig) {
t.Errorf("testi: %d, test: %+v, onBoundaryRes != onBROrig,\n onBoundaryRes:%v,\n onBROrig: %v",
testi, test, onBoundaryRes, onBROrig)
}
if !reflect.DeepEqual(offBoundaryRes, offBROrig) {
t.Errorf("testi: %d, test: %+v, offBoundaryRes, offBROrig,\n offBoundaryRes: %v,\n offBROrig: %v",
testi, test, offBoundaryRes, offBROrig)
}
}
}
// --------------------------------------------------------------------
func BenchmarkComputeGeoRangePt01(b *testing.B) {
onBoundary := 4
offBoundary := 0
benchmarkComputeGeoRange(b, -0.01, -0.01, 0.01, 0.01, onBoundary, offBoundary)
}
func BenchmarkComputeGeoRangePt1(b *testing.B) {
onBoundary := 56
offBoundary := 144
benchmarkComputeGeoRange(b, -0.1, -0.1, 0.1, 0.1, onBoundary, offBoundary)
}
func BenchmarkComputeGeoRange10(b *testing.B) {
onBoundary := 5464
offBoundary := 53704
benchmarkComputeGeoRange(b, -10.0, -10.0, 10.0, 10.0, onBoundary, offBoundary)
}
func BenchmarkComputeGeoRange100(b *testing.B) {
onBoundary := 32768
offBoundary := 258560
benchmarkComputeGeoRange(b, -100.0, -100.0, 100.0, 100.0, onBoundary, offBoundary)
}
// --------------------------------------------------------------------
func benchmarkComputeGeoRange(b *testing.B,
minLon, minLat, maxLon, maxLat float64, onBoundary, offBoundary int,
) {
checkBoundaries := true
b.ResetTimer()
for i := 0; i < b.N; i++ {
onBoundaryRes, offBoundaryRes, err := ComputeGeoRange(context.TODO(), 0, GeoBitsShift1Minus1, minLon, minLat, maxLon, maxLat, checkBoundaries, nil, "")
if err != nil {
b.Fatalf("expected no err")
}
if len(onBoundaryRes) != onBoundary || len(offBoundaryRes) != offBoundary {
b.Fatalf("boundaries not matching")
}
}
}
// --------------------------------------------------------------------
// original, non-optimized implementation of ComputeGeoRange
func origComputeGeoRange(term uint64, shift uint,
sminLon, sminLat, smaxLon, smaxLat float64,
checkBoundaries bool) (
onBoundary [][]byte, notOnBoundary [][]byte,
) {
split := term | uint64(0x1)<> 1
within := res%document.GeoPrecisionStep == 0 &&
geo.RectWithin(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)
if within || (level == geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat)) {
if !within && checkBoundaries {
return [][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}, nil
}
return nil,
[][]byte{
numeric.MustNewPrefixCodedInt64(int64(start), res),
}
} else if level < geoDetailLevel &&
geo.RectIntersects(minLon, minLat, maxLon, maxLat,
sminLon, sminLat, smaxLon, smaxLat) {
return origComputeGeoRange(start, res-1, sminLon, sminLat, smaxLon, smaxLat,
checkBoundaries)
}
return nil, nil
}
================================================
FILE: search/searcher/search_geopointdistance.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func NewGeoPointDistanceSearcher(ctx context.Context, indexReader index.IndexReader, centerLon,
centerLat, dist float64, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
var rectSearcher search.Searcher
if tp, ok := indexReader.(index.SpatialIndexPlugin); ok {
sp, err := tp.GetSpatialAnalyzerPlugin("s2")
if err == nil {
terms := sp.GetQueryTokens(geo.NewPointDistance(centerLat,
centerLon, dist))
rectSearcher, err = NewMultiTermSearcher(ctx, indexReader, terms,
field, boost, options, false)
if err != nil {
return nil, err
}
}
}
// indexes without the spatial plugin override would get
// initialized here.
if rectSearcher == nil {
// compute bounding box containing the circle
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
geo.RectFromPointDistance(centerLon, centerLat, dist)
if err != nil {
return nil, err
}
// build a searcher for the box
rectSearcher, err = boxSearcher(ctx, indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
field, boost, options, false)
if err != nil {
return nil, err
}
}
dvReader, err := indexReader.DocValueReader([]string{field})
if err != nil {
return nil, err
}
// wrap it in a filtering searcher which checks the actual distance
return NewFilteringSearcher(ctx, rectSearcher,
buildDistFilter(ctx, dvReader, centerLon, centerLat, dist)), nil
}
// boxSearcher builds a searcher for the described bounding box
// if the desired box crosses the dateline, it is automatically split into
// two boxes joined through a disjunction searcher
func boxSearcher(ctx context.Context, indexReader index.IndexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat float64,
field string, boost float64, options search.SearcherOptions, checkBoundaries bool) (
search.Searcher, error) {
if bottomRightLon < topLeftLon {
// cross date line, rewrite as two parts
leftSearcher, err := NewGeoBoundingBoxSearcher(ctx, indexReader,
-180, bottomRightLat, bottomRightLon, topLeftLat,
field, boost, options, checkBoundaries)
if err != nil {
return nil, err
}
rightSearcher, err := NewGeoBoundingBoxSearcher(ctx, indexReader,
topLeftLon, bottomRightLat, 180, topLeftLat, field, boost, options,
checkBoundaries)
if err != nil {
_ = leftSearcher.Close()
return nil, err
}
boxSearcher, err := NewDisjunctionSearcher(ctx, indexReader,
[]search.Searcher{leftSearcher, rightSearcher}, 0, options)
if err != nil {
_ = leftSearcher.Close()
_ = rightSearcher.Close()
return nil, err
}
return boxSearcher, nil
}
// build geoboundingbox searcher for that bounding box
boxSearcher, err := NewGeoBoundingBoxSearcher(ctx, indexReader,
topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, field, boost,
options, checkBoundaries)
if err != nil {
return nil, err
}
return boxSearcher, nil
}
func buildDistFilter(ctx context.Context, dvReader index.DocValueReader,
centerLon, centerLat, maxDist float64) FilterFunc {
// reuse the following for each document match that is checked using the filter
var lons, lats []float64
var found bool
dvVisitor := func(_ string, term []byte) {
if found {
// avoid redundant work if already found
return
}
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
found = true
}
}
}
return func(sctx *search.SearchContext, d *search.DocumentMatch) bool {
// check geo matches against all numeric type terms indexed
lons, lats = lons[:0], lats[:0]
found = false
if err := dvReader.VisitDocValues(d.IndexInternalID, dvVisitor); err == nil && found {
bytes := dvReader.BytesRead()
if bytes > 0 {
reportIOStats(ctx, bytes)
search.RecordSearchCost(ctx, search.AddM, bytes)
}
for i := range lons {
dist := geo.Haversin(lons[i], lats[i], centerLon, centerLat)
if dist <= maxDist/1000 {
return true
}
}
}
return false
}
}
================================================
FILE: search/searcher/search_geopointdistance_test.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestGeoPointDistanceSearcher(t *testing.T) {
tests := []struct {
centerLon float64
centerLat float64
dist float64
field string
want []string
}{
// approx 110567m per degree at equator
{0.0, 0.0, 0, "loc", nil},
{0.0, 0.0, 110567, "loc", []string{"a"}},
{0.0, 0.0, 2 * 110567, "loc", []string{"a", "b"}},
// stretching our approximation here
{0.0, 0.0, 15 * 110567, "loc", []string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"}},
}
i := setupGeo(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for _, test := range tests {
got, err := testGeoPointDistanceSearch(indexReader, test.centerLon, test.centerLat, test.dist, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("expected %v, got %v for %f %f %f %s", test.want, got, test.centerLon, test.centerLat, test.dist, test.field)
}
}
}
func testGeoPointDistanceSearch(i index.IndexReader, centerLon, centerLat, dist float64, field string) ([]string, error) {
var rv []string
gds, err := NewGeoPointDistanceSearcher(context.TODO(), i, centerLon, centerLat, dist, field, 1.0, search.SearcherOptions{})
if err != nil {
return nil, err
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(gds.DocumentMatchPoolSize(), 0),
}
docMatch, err := gds.Next(ctx)
for docMatch != nil && err == nil {
rv = append(rv, string(docMatch.IndexInternalID))
docMatch, err = gds.Next(ctx)
}
if err != nil {
return nil, err
}
return rv, nil
}
func TestGeoPointDistanceCompare(t *testing.T) {
tests := []struct {
docLat, docLon float64
centerLat, centerLon float64
distance string
}{
// Data points originally from MB-33454.
{
docLat: 33.718,
docLon: -116.8293,
centerLat: 39.59000587,
centerLon: -119.22998428,
distance: "10000mi",
},
{
docLat: 41.1305,
docLon: -121.6587,
centerLat: 61.28,
centerLon: -149.34,
distance: "10000mi",
},
}
for testi, test := range tests {
// compares the results from ComputeGeoRange with original, non-optimized version
compare := func(desc string,
minLon, minLat, maxLon, maxLat float64, checkBoundaries bool,
) {
// do math to produce list of terms needed for this search
onBoundaryRes, offBoundaryRes, err := ComputeGeoRange(context.TODO(), 0, GeoBitsShift1Minus1,
minLon, minLat, maxLon, maxLat, checkBoundaries, nil, "")
if err != nil {
t.Fatal(err)
}
onBROrig, offBROrig := origComputeGeoRange(0, GeoBitsShift1Minus1,
minLon, minLat, maxLon, maxLat, checkBoundaries)
if !reflect.DeepEqual(onBoundaryRes, onBROrig) {
t.Fatalf("testi: %d, test: %+v, desc: %s, onBoundaryRes != onBROrig,\n onBoundaryRes:%v,\n onBROrig: %v",
testi, test, desc, onBoundaryRes, onBROrig)
}
if !reflect.DeepEqual(offBoundaryRes, offBROrig) {
t.Fatalf("testi: %d, test: %+v, desc: %s, offBoundaryRes, offBROrig,\n offBoundaryRes: %v,\n offBROrig: %v",
testi, test, desc, offBoundaryRes, offBROrig)
}
}
// follow the general approach of the GeoPointDistanceSearcher...
dist, err := geo.ParseDistance(test.distance)
if err != nil {
t.Fatal(err)
}
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err := geo.RectFromPointDistance(test.centerLon, test.centerLat, dist)
if err != nil {
t.Fatal(err)
}
if bottomRightLon < topLeftLon {
// crosses date line, rewrite as two parts
compare("-180/f", -180, bottomRightLat, bottomRightLon, topLeftLat, false)
compare("-180/t", -180, bottomRightLat, bottomRightLon, topLeftLat, true)
compare("180/f", topLeftLon, bottomRightLat, 180, topLeftLat, false)
compare("180/t", topLeftLon, bottomRightLat, 180, topLeftLat, true)
} else {
compare("reg/f", topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, false)
compare("reg/t", topLeftLon, bottomRightLat, bottomRightLon, topLeftLat, true)
}
}
}
================================================
FILE: search/searcher/search_geopolygon.go
================================================
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"fmt"
"math"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func NewGeoBoundedPolygonSearcher(ctx context.Context, indexReader index.IndexReader,
coordinates []geo.Point, field string, boost float64,
options search.SearcherOptions) (search.Searcher, error) {
if len(coordinates) < 3 {
return nil, fmt.Errorf("Too few points specified for the polygon boundary")
}
var rectSearcher search.Searcher
if sr, ok := indexReader.(index.SpatialIndexPlugin); ok {
tp, err := sr.GetSpatialAnalyzerPlugin("s2")
if err == nil {
terms := tp.GetQueryTokens(geo.NewBoundedPolygon(coordinates))
rectSearcher, err = NewMultiTermSearcher(ctx, indexReader, terms,
field, boost, options, false)
if err != nil {
return nil, err
}
}
}
// indexes without the spatial plugin override would get
// initialized here.
if rectSearcher == nil {
// compute the bounding box enclosing the polygon
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat, err :=
geo.BoundingRectangleForPolygon(coordinates)
if err != nil {
return nil, err
}
// build a searcher for the bounding box on the polygon
rectSearcher, err = boxSearcher(ctx, indexReader,
topLeftLon, topLeftLat, bottomRightLon, bottomRightLat,
field, boost, options, true)
if err != nil {
return nil, err
}
}
dvReader, err := indexReader.DocValueReader([]string{field})
if err != nil {
return nil, err
}
// wrap it in a filtering searcher that checks for the polygon inclusivity
return NewFilteringSearcher(ctx, rectSearcher,
buildPolygonFilter(ctx, dvReader, field, coordinates)), nil
}
const float64EqualityThreshold = 1e-6
func almostEqual(a, b float64) bool {
return math.Abs(a-b) <= float64EqualityThreshold
}
// buildPolygonFilter returns true if the point lies inside the
// polygon. It is based on the ray-casting technique as referred
// here: https://wrf.ecse.rpi.edu/nikola/pubdetails/pnpoly.html
func buildPolygonFilter(ctx context.Context, dvReader index.DocValueReader, field string,
coordinates []geo.Point) FilterFunc {
// reuse the following for each document match that is checked using the filter
var lons, lats []float64
var found bool
dvVisitor := func(_ string, term []byte) {
if found {
// avoid redundant work if already found
return
}
// only consider the values which are shifted 0
prefixCoded := numeric.PrefixCoded(term)
shift, err := prefixCoded.Shift()
if err == nil && shift == 0 {
i64, err := prefixCoded.Int64()
if err == nil {
lons = append(lons, geo.MortonUnhashLon(uint64(i64)))
lats = append(lats, geo.MortonUnhashLat(uint64(i64)))
found = true
}
}
}
rayIntersectsSegment := func(point, a, b geo.Point) bool {
return (a.Lat > point.Lat) != (b.Lat > point.Lat) &&
point.Lon < (b.Lon-a.Lon)*(point.Lat-a.Lat)/(b.Lat-a.Lat)+a.Lon
}
return func(sctx *search.SearchContext, d *search.DocumentMatch) bool {
// check geo matches against all numeric type terms indexed
lons, lats = lons[:0], lats[:0]
found = false
// Note: this approach works for points which are strictly inside
// the polygon. ie it might fail for certain points on the polygon boundaries.
if err := dvReader.VisitDocValues(d.IndexInternalID, dvVisitor); err == nil && found {
bytes := dvReader.BytesRead()
if bytes > 0 {
reportIOStats(ctx, bytes)
search.RecordSearchCost(ctx, search.AddM, bytes)
}
nVertices := len(coordinates)
if len(coordinates) < 3 {
return false
}
for i := range lons {
pt := geo.Point{Lon: lons[i], Lat: lats[i]}
inside := rayIntersectsSegment(pt, coordinates[len(coordinates)-1], coordinates[0])
// check for a direct vertex match
if almostEqual(coordinates[0].Lat, lats[i]) &&
almostEqual(coordinates[0].Lon, lons[i]) {
return true
}
for j := 1; j < nVertices; j++ {
if almostEqual(coordinates[j].Lat, lats[i]) &&
almostEqual(coordinates[j].Lon, lons[i]) {
return true
}
if rayIntersectsSegment(pt, coordinates[j-1], coordinates[j]) {
inside = !inside
}
}
if inside {
return true
}
}
}
return false
}
}
================================================
FILE: search/searcher/search_geopolygon_test.go
================================================
// Copyright (c) 2019 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/index/upsidedown"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestSimpleGeoPolygons(t *testing.T) {
tests := []struct {
polygon []geo.Point
field string
want []string
}{
// test points inside a triangle & on vertices
// r, s - inside and t,u - on vertices.
{[]geo.Point{{Lon: 1.0, Lat: 1.0}, {Lon: 2.0, Lat: 1.9}, {Lon: 2.0, Lat: 1.0}}, "loc", []string{"r", "s", "t", "u"}},
// non overlapping polygon for the indexed documents
{[]geo.Point{{Lon: 3.0, Lat: 1.0}, {Lon: 4.0, Lat: 2.5}, {Lon: 3.0, Lat: 2}}, "loc", nil},
}
i := setupGeoPolygonPoints(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for _, test := range tests {
got, err := testGeoPolygonSearch(indexReader, test.polygon, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("expected %v, got %v for polygon: %+v", test.want, got, test.polygon)
}
}
}
func TestRealGeoPolygons(t *testing.T) {
tests := []struct {
polygon []geo.Point
field string
want []string
}{
{[]geo.Point{
{Lon: -80.881, Lat: 35.282},
{Lon: -80.858, Lat: 35.281},
{Lon: -80.864, Lat: 35.270},
}, "loc", []string{"k", "l"}},
{[]geo.Point{
{Lon: -82.467, Lat: 36.356},
{Lon: -78.127, Lat: 36.321},
{Lon: -80.555, Lat: 32.932},
{Lon: -84.807, Lat: 33.111},
}, "loc", []string{"k", "l", "m"}},
// same polygon vertices
{[]geo.Point{{Lon: -82.467, Lat: 36.356}, {Lon: -82.467, Lat: 36.356}, {Lon: -82.467, Lat: 36.356}, {Lon: -82.467, Lat: 36.356}}, "loc", nil},
// non-overlaping polygon
{[]geo.Point{{Lon: -89.113, Lat: 36.400}, {Lon: -93.947, Lat: 36.471}, {Lon: -93.947, Lat: 34.031}}, "loc", nil},
// concave polygon with a document `n` residing inside the hands, but outside the polygon
{[]geo.Point{{Lon: -71.65, Lat: 42.446}, {Lon: -71.649, Lat: 42.428}, {Lon: -71.640, Lat: 42.445}, {Lon: -71.649, Lat: 42.435}}, "loc", nil},
// V like concave polygon with a document 'p' residing inside the bottom corner
{[]geo.Point{{Lon: -80.304, Lat: 40.740}, {Lon: -80.038, Lat: 40.239}, {Lon: -79.562, Lat: 40.786}, {Lon: -80.018, Lat: 40.328}}, "loc", []string{"p"}},
{[]geo.Point{
{Lon: -111.918, Lat: 33.515},
{Lon: -111.938, Lat: 33.494},
{Lon: -111.944, Lat: 33.481},
{Lon: -111.886, Lat: 33.517},
{Lon: -111.919, Lat: 33.468},
{Lon: -111.929, Lat: 33.508},
}, "loc", []string{"q"}},
// real points near cb bangalore
{[]geo.Point{
{Lat: 12.974872, Lon: 77.607749},
{Lat: 12.971725, Lon: 77.610110},
{Lat: 12.972530, Lon: 77.606912},
{Lat: 12.975112, Lon: 77.603780},
}, "loc", []string{"amoeba", "communiti"}},
}
i := setupGeoPolygonPoints(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for _, test := range tests {
got, err := testGeoPolygonSearch(indexReader, test.polygon, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("expected %v, got %v for polygon: %+v", test.want, got, test.polygon)
}
}
}
func TestGeoRectanglePolygon(t *testing.T) {
tests := []struct {
polygon []geo.Point
field string
want []string
}{
{
[]geo.Point{{Lon: 0, Lat: 0}, {Lon: 0, Lat: 50}, {Lon: 50, Lat: 50}, {Lon: 50, Lat: 0}, {Lon: 0, Lat: 0}},
"loc",
[]string{"a", "b", "c", "d", "e", "f", "g", "h", "i", "j"},
},
}
i := setupGeo(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for _, test := range tests {
got, err := testGeoPolygonSearch(indexReader, test.polygon, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("expected %v, got %v for polygon: %+v", test.want, got, test.polygon)
}
}
}
func testGeoPolygonSearch(i index.IndexReader, polygon []geo.Point, field string) ([]string, error) {
var rv []string
gbs, err := NewGeoBoundedPolygonSearcher(context.TODO(), i, polygon, field, 1.0, search.SearcherOptions{})
if err != nil {
return nil, err
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(gbs.DocumentMatchPoolSize(), 0),
}
docMatch, err := gbs.Next(ctx)
for docMatch != nil && err == nil {
rv = append(rv, string(docMatch.IndexInternalID))
docMatch, err = gbs.Next(ctx)
}
if err != nil {
return nil, err
}
return rv, nil
}
func setupGeoPolygonPoints(t *testing.T) index.Index {
analysisQueue := index.NewAnalysisQueue(1)
i, err := upsidedown.NewUpsideDownCouch(
gtreap.Name,
map[string]interface{}{
"path": "",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
doc := document.NewDocument("k")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, -80.86469327, 35.2782))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("l")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, -80.8713, 35.28138))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("m")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, -84.25, 33.153))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("n")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, -89.992, 35.063))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("o")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, -71.648, 42.437))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("p")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, -80.016, 40.314))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("q")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, -111.919, 33.494))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("r")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, 1.5, 1.1))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("s")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, 2, 1.5))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("t")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, 2.0, 1.9))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("u")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, 2.0, 1.0))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("amoeba")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, 77.60490, 12.97467))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("communiti")
doc.AddField(document.NewGeoPointField("loc", []uint64{}, 77.608237, 12.97237))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
return i
}
type geoPoint struct {
title string
lon float64
lat float64
}
// Test points inside a complex self intersecting polygon
func TestComplexGeoPolygons(t *testing.T) {
tests := []struct {
polygon []geo.Point
points []geoPoint
field string
want []string
}{
/*
/\ /\
/__\____/__\
\ /
\/
*/
// a, b, c - inside and d - on vertices.
{
[]geo.Point{
{Lon: 6.0, Lat: 2.0},
{Lon: 3.0, Lat: 4.0},
{Lon: 9.0, Lat: 6.0},
{Lon: 3.0, Lat: 8.0},
{Lon: 6.0, Lat: 10.0},
{Lon: 6.0, Lat: 2.0},
},
[]geoPoint{
{title: "a", lon: 3, lat: 4},
{title: "b", lon: 7, lat: 6},
{title: "c", lon: 4, lat: 8.1},
{title: "d", lon: 6, lat: 10.0},
{title: "e", lon: 5, lat: 6},
{title: "f", lon: 7, lat: 5},
},
"loc",
[]string{"a", "b", "c", "d"},
},
/*
____
\ /
\/
/\
/__\
*/
{
[]geo.Point{
{Lon: 7.0, Lat: 2.0},
{Lon: 1.0, Lat: 8.0},
{Lon: 1.0, Lat: 2.0},
{Lon: 7.0, Lat: 8.0},
{Lon: 7.0, Lat: 2.0},
},
[]geoPoint{
{title: "a", lon: 6, lat: 5},
{title: "b", lon: 5, lat: 5},
{title: "c", lon: 3, lat: 5.0},
{title: "d", lon: 2, lat: 4.0},
{title: "e", lon: 5, lat: 3},
{title: "f", lon: 4, lat: 4},
},
"loc",
[]string{"a", "b", "c", "d"},
},
}
for _, test := range tests {
i := setupComplexGeoPolygonPoints(t, test.points)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
got, err := testGeoPolygonSearch(indexReader, test.polygon, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("expected %v, got %v for polygon: %+v", test.want, got, test.polygon)
}
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}
}
func setupComplexGeoPolygonPoints(t *testing.T, points []geoPoint) index.Index {
analysisQueue := index.NewAnalysisQueue(1)
i, err := upsidedown.NewUpsideDownCouch(
gtreap.Name,
map[string]interface{}{
"path": "",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
for _, point := range points {
doc := document.NewDocument(point.title)
doc.AddField(document.NewGeoPointField("loc", []uint64{}, point.lon, point.lat))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
}
return i
}
================================================
FILE: search/searcher/search_geoshape.go
================================================
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"bytes"
"context"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
"github.com/blevesearch/geo/geojson"
"github.com/blevesearch/geo/s2"
)
func NewGeoShapeSearcher(ctx context.Context, indexReader index.IndexReader, shape index.GeoJSON,
relation string, field string, boost float64,
options search.SearcherOptions,
) (search.Searcher, error) {
var err error
var spatialPlugin index.SpatialAnalyzerPlugin
// check for the spatial plugin from the index.
if sr, ok := indexReader.(index.SpatialIndexPlugin); ok {
spatialPlugin, _ = sr.GetSpatialAnalyzerPlugin("s2")
}
if spatialPlugin == nil {
// fallback to the default spatial plugin(s2).
spatialPlugin = geo.GetSpatialAnalyzerPlugin("s2")
}
// obtain the query tokens.
terms := spatialPlugin.GetQueryTokens(shape)
mSearcher, err := NewMultiTermSearcher(ctx, indexReader, terms,
field, boost, options, false)
if err != nil {
return nil, err
}
dvReader, err := indexReader.DocValueReader([]string{field})
if err != nil {
return nil, err
}
return NewFilteringSearcher(ctx, mSearcher, buildRelationFilterOnShapes(ctx, dvReader, field, relation, shape)), nil
}
func buildRelationFilterOnShapes(ctx context.Context, dvReader index.DocValueReader, field string,
relation string, shape index.GeoJSON,
) FilterFunc {
// this is for accumulating the shape's actual complete value
// spread across multiple docvalue visitor callbacks.
var dvShapeValue []byte
var startReading, finishReading, found bool
var reader *bytes.Reader
var bufPool *s2.GeoBufferPool
if bufPoolCallback, ok := ctx.Value(search.GeoBufferPoolCallbackKey).(search.GeoBufferPoolCallbackFunc); ok {
bufPool = bufPoolCallback()
}
dvVisitor := func(_ string, term []byte) {
if found {
// avoid redundant work if already found
return
}
tl := len(term)
// only consider the values which are GlueBytes prefixed or
// if it had already started reading the shape bytes from previous callbacks.
if startReading || tl > geo.GlueBytesOffset {
if !startReading && bytes.Equal(geo.GlueBytes, term[:geo.GlueBytesOffset]) {
startReading = true
if bytes.Equal(geo.GlueBytes, term[tl-geo.GlueBytesOffset:]) {
term = term[:tl-geo.GlueBytesOffset]
finishReading = true
}
dvShapeValue = append(dvShapeValue, term[geo.GlueBytesOffset:]...)
} else if startReading && !finishReading {
if tl > geo.GlueBytesOffset &&
bytes.Equal(geo.GlueBytes, term[tl-geo.GlueBytesOffset:]) {
term = term[:tl-geo.GlueBytesOffset]
finishReading = true
}
dvShapeValue = append(dvShapeValue, index.DocValueTermSeparator)
dvShapeValue = append(dvShapeValue, term...)
}
// apply the filter once the entire docvalue is finished reading.
if finishReading {
v, err := geojson.FilterGeoShapesOnRelation(shape, dvShapeValue, relation, &reader, bufPool)
if err == nil && v {
found = true
}
dvShapeValue = dvShapeValue[:0]
startReading = false
finishReading = false
}
}
}
return func(sctx *search.SearchContext, d *search.DocumentMatch) bool {
// reset state variables for each document
found = false
startReading = false
finishReading = false
dvShapeValue = dvShapeValue[:0]
if err := dvReader.VisitDocValues(d.IndexInternalID, dvVisitor); err == nil && found {
bytes := dvReader.BytesRead()
if bytes > 0 {
reportIOStats(ctx, bytes)
search.RecordSearchCost(ctx, search.AddM, bytes)
}
return found
}
return false
}
}
================================================
FILE: search/searcher/search_geoshape_circle_test.go
================================================
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestGeoJsonCircleIntersectsQuery(t *testing.T) {
tests := []struct {
centrePoint []float64
radiusInMeters string
field string
want []string
}{
// test intersecting query circle for polygon1.
{
[]float64{77.68115043640137, 12.94663769274367},
"200m",
"geometry",
[]string{"polygon1"},
},
// test intersecting query circle for polygon1, circle1 and linestring1.
{
[]float64{77.68115043640137, 12.94663769274367},
"750m",
"geometry",
[]string{"polygon1", "circle1", "linestring1"},
},
// test intersecting query circle for linestring2.
{
[]float64{77.69591331481932, 12.92756503709986},
"250m",
"geometry",
[]string{"linestring2"},
},
// test intersecting query circle for circle1.
{[]float64{77.6767, 12.9422}, "250m", "geometry", []string{"circle1"}},
// test intersecting query circle for point1, envelope1 and linestring3.
{
[]float64{81.243896484375, 26.22444694563432},
"90000m",
"geometry",
[]string{"point1", "envelope1", "linestring3"},
},
// test intersecting query circle for envelope.
{
[]float64{79.98458862304688, 25.339061458818374},
"1250m",
"geometry",
[]string{"envelope1"},
},
// test intersecting query circle for multipoint.
{
[]float64{81.87346458435059, 25.41505910223247},
"200m",
"geometry",
[]string{"multipoint1"},
},
// test intersecting query circle for multilinestring.
{
[]float64{81.8669843673706, 25.512661276952272},
"90m",
"geometry",
[]string{"multilinestring1"},
},
}
i := setupGeoJsonShapesIndexForCircleQuery(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapeCircleRelationQuery("intersects",
indexReader, test.centrePoint, test.radiusInMeters, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v",
n, test.want, got, test.centrePoint)
}
}
}
func TestGeoJsonCircleWithInQuery(t *testing.T) {
tests := []struct {
centrePoint []float64
radiusInMeters string
field string
want []string
}{
// test query circle containing polygon2 and multilinestring2.
{
[]float64{81.85981750488281, 25.546778150624146},
"3700m",
"geometry",
[]string{"polygon2", "multilinestring2"},
},
// test query circle containing multilinestring2.
{
[]float64{81.85981750488281, 25.546778150624146},
"3250m",
"geometry",
[]string{"multilinestring2"},
},
// test query circle containing multipoint1.
{
[]float64{81.88599586486816, 25.425756968727935},
"1650m",
"geometry",
[]string{"multipoint1"},
},
// test query circle containing circle2.
{
[]float64{82.09362030029297, 25.546313513788725},
"1280m",
"geometry",
[]string{"envelope2", "circle2"},
},
// test query circle containing envelope2 and circle2.
{
[]float64{82.10289001464844, 25.544919592476727},
"700m",
"geometry",
[]string{"envelope2", "circle2"},
},
// test query circle containing point1 and linestring3.
{
[]float64{81.27685546875, 26.1899475672235},
"5600m",
"geometry",
[]string{"point1", "linestring3"},
},
}
i := setupGeoJsonShapesIndexForCircleQuery(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapeCircleRelationQuery("within", indexReader, test.centrePoint, test.radiusInMeters, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v", n, test.want, got, test.centrePoint)
}
}
}
func TestGeoJsonCircleContainsQuery(t *testing.T) {
tests := []struct {
centrePoint []float64
radiusInMeters string
field string
want []string
}{
// test query circle within polygon3.
{
[]float64{8.549551963806152, 47.3759038562437},
"180m",
"geometry",
[]string{"polygon3"},
},
// test query circle containing envelope3.
{
[]float64{8.551011085510254, 47.380117626829275},
"75m",
"geometry",
[]string{"envelope3"},
},
// test query circle exceeding envelope3 with a few meters.
{
[]float64{8.551011085510254, 47.380117626829275},
"78m",
"geometry", nil,
},
// test query circle containing circle3.
{
[]float64{8.535819053649902, 47.38297989270074},
"185m",
"geometry",
[]string{"circle3"},
},
}
i := setupGeoJsonShapesIndexForCircleQuery(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapeCircleRelationQuery("contains",
indexReader, test.centrePoint, test.radiusInMeters, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v",
n, test.want, got, test.centrePoint)
}
}
}
func runGeoShapeCircleRelationQuery(relation string, i index.IndexReader,
points []float64, radius string, field string,
) ([]string, error) {
var rv []string
s := geo.NewGeoCircle(points, radius)
gbs, err := NewGeoShapeSearcher(context.TODO(), i, s, relation, field, 1.0, search.SearcherOptions{})
if err != nil {
return nil, err
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(gbs.DocumentMatchPoolSize(), 0),
}
docMatch, err := gbs.Next(ctx)
for docMatch != nil && err == nil {
docID, _ := i.ExternalID(docMatch.IndexInternalID)
rv = append(rv, docID)
docMatch, err = gbs.Next(ctx)
}
if err != nil {
return nil, err
}
return rv, nil
}
func setupGeoJsonShapesIndexForCircleQuery(t *testing.T) index.Index {
analysisQueue := index.NewAnalysisQueue(1)
i, err := scorch.NewScorch(
gtreap.Name,
map[string]interface{}{
"path": "",
"spatialPlugin": "s2",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
polygon1 := [][][][]float64{{{
{77.67248153686523, 12.957679089615821},
{77.67956256866455, 12.948101542434257},
{77.68908977508545, 12.948896200093982},
{77.68934726715086, 12.955211547173878},
{77.68016338348389, 12.954291440344619},
{77.67248153686523, 12.957679089615821},
}}}
doc := document.NewDocument("polygon1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygon1, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
polygon2 := [][][][]float64{{{
{81.84951782226561, 25.522692102524033},
{81.8557834625244, 25.521762640415535},
{81.86264991760254, 25.521762640415535},
{81.86676979064941, 25.521607729364224},
{81.89560890197754, 25.542673796271302},
{81.88977241516113, 25.543293330460937},
{81.84951782226561, 25.522692102524033},
}}}
doc = document.NewDocument("polygon2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygon2, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
polygon3 := [][][][]float64{{{
{8.548071384429932, 47.379216780040124},
{8.547642230987549, 47.3771680227784},
{8.545818328857422, 47.37677569847655},
{8.546290397644043, 47.37417465983494},
{8.551719188690186, 47.37417465983494},
{8.553242683410645, 47.37679022905829},
{8.548071384429932, 47.379216780040124},
}}}
doc = document.NewDocument("polygon3")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygon3, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
point1 := [][][][]float64{{{{81.2439, 26.2244}}}}
doc = document.NewDocument("point1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
point1, "point", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
envelope1 := [][][][]float64{{{
{79.9969482421875, 23.895882703682627},
{80.7220458984375, 25.750424835909385},
}}}
doc = document.NewDocument("envelope1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
envelope1, "envelope", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
envelope2 := [][][][]float64{{{
{82.10409164428711, 25.54360309635522},
{82.10537910461424, 25.544609829984058},
}}}
doc = document.NewDocument("envelope2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
envelope2, "envelope", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
envelope3 := [][][][]float64{{{
{8.545668125152588, 47.37942019840244},
{8.552148342132568, 47.383778974713124},
}}}
doc = document.NewDocument("envelope3")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
envelope3, "envelope", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("circle1")
doc.AddField(document.NewGeoCircleFieldWithIndexingOptions("geometry", []uint64{},
[]float64{77.67252445220947, 12.936348678099293}, "900m",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("circle2")
doc.AddField(document.NewGeoCircleFieldWithIndexingOptions("geometry", []uint64{},
[]float64{82.10289001464844, 25.544919592476727}, "100m",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("circle3")
doc.AddField(document.NewGeoCircleFieldWithIndexingOptions("geometry", []uint64{},
[]float64{
8.53363037109375,
47.38191927423153,
}, "400m",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
linestring := [][][][]float64{{{
{77.68715858459473, 12.944755587650944},
{77.69213676452637, 12.945090185150542},
}}}
doc = document.NewDocument("linestring1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
linestring, "linestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
linestring1 := [][][][]float64{{{
{77.68913269042969, 12.929614580987227},
{77.70252227783203, 12.929698235482276},
}}}
doc = document.NewDocument("linestring2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
linestring1, "linestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
linestring2 := [][][][]float64{{{
{81.26792907714844, 26.170845301716813},
{81.30157470703125, 26.18440207077121},
}}}
doc = document.NewDocument("linestring3")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
linestring2, "linestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multilinestring := [][][][]float64{{{
{81.86170578002928, 25.430407918899984},
{81.86273574829102, 25.421958559611397},
}, {
{81.88230514526367, 25.437616536907512},
{81.90084457397461, 25.431415601111418},
}, {
{81.86805725097656, 25.514868905100244},
{81.86702728271484, 25.502474677473746},
}}}
doc = document.NewDocument("multilinestring1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
multilinestring, "multilinestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multilinestring1 := [][][][]float64{{
{
{81.84642791748047, 25.561335859046192},
{81.84230804443358, 25.550495180470026},
},
{{81.87423706054688, 25.55142441992021}, {81.88453674316406, 25.555141305670045}},
{{81.8642807006836, 25.572175556682115}, {81.87458038330078, 25.567839795359724}},
}}
doc = document.NewDocument("multilinestring2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
multilinestring1, "multilinestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multipoint1 := [][][][]float64{{{
{81.87337875366211, 25.432268248708212},
{81.87355041503906, 25.416299483230368},
{81.90118789672852, 25.426067037656946},
}}}
doc = document.NewDocument("multipoint1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
multipoint1, "multipoint", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
polygonWithHole1 := [][][][]float64{{
{
{77.59991168975829, 12.972232910164502},
{77.6039457321167, 12.97582941279006},
{77.60424613952637, 12.98168407323241},
{77.59974002838135, 12.985489528568463},
{77.59321689605713, 12.979300406693417},
{77.59991168975829, 12.972232910164502},
},
{
{77.59682178497314, 12.975787593290978},
{77.60295867919922, 12.975787593290978},
{77.60295867919922, 12.98143316204164},
{77.59682178497314, 12.98143316204164},
{77.59682178497314, 12.975787593290978},
},
}}
doc = document.NewDocument("polygonWithHole1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygonWithHole1, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
return i
}
================================================
FILE: search/searcher/search_geoshape_envelope_test.go
================================================
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestGeoJsonEnvelopeWithInQuery(t *testing.T) {
tests := []struct {
points [][]float64
field string
want []string
}{
// test within query envelope for point1.
{
[][]float64{
{76.256103515625, 16.76772739719064},
{76.35772705078125, 16.872890378907783},
},
"geometry",
[]string{"point1"},
},
// test within query envelope for multipoint1.
{
[][]float64{
{81.046142578125, 17.156537255486093},
{81.331787109375, 17.96305758238804},
},
"geometry",
[]string{"multipoint1"},
},
// test within query envelope for partial points in a multipoint1.
{
[][]float64{
{81.05987548828125, 17.16178591271515},
{81.36199951171875, 17.861132899477624},
},
"geometry", nil,
},
// test within query envelope for polygon2 and point1.
{
[][]float64{
{76.00341796875, 16.573022719182777},
{76.717529296875, 17.006888277600524},
},
"geometry",
[]string{"polygon2", "point1"},
},
// test within query envelope for linestring1.
{
[][]float64{
{76.84112548828125, 16.86500518090961},
{77.62115478515625, 17.531439701706244},
},
"geometry",
[]string{"linestring1"},
},
// test within query envelope for multilinestring1.
{
[][]float64{
{81.683349609375, 17.104042525557904},
{81.99234008789062, 17.66495983051931},
},
"geometry",
[]string{"multilinestring1"},
},
// test within query envelope that is intersecting multilinestring1.
{
[][]float64{
{81.65725708007812, 17.2601707001208},
{81.95114135742186, 17.66495983051931},
},
"geometry", nil,
},
// test within query envelope for envelope1 and circle1.
{
[][]float64{
{74.75372314453125, 17.36636733709516},
{75.509033203125, 18.038809662036805},
},
"geometry",
[]string{"envelope1", "circle1"},
},
// test within query envelope for envelope1.
{
[][]float64{
{74.783935546875, 17.38209494787749},
{75.96221923828125, 17.727758609852284},
},
"geometry",
[]string{"envelope1"},
},
}
i := setupGeoJsonShapesIndexForEnvelopeQuery(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapeEnvelopeRelationQuery("within",
indexReader, test.points, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v",
n, test.want, got, test.points)
}
}
}
func TestGeoJsonEnvelopeIntersectsQuery(t *testing.T) {
tests := []struct {
points [][]float64
field string
want []string
}{
// test intersecting query envelope for partial points in a multipoint1.
{
[][]float64{
{81.00769042968749, 17.80622614478282},
{81.199951171875, 17.983957957423037},
},
"geometry",
[]string{"multipoint1"},
},
// test intersecting query envelope that is intersecting multilinestring1.
{
[][]float64{
{81.65725708007812, 17.2601707001208},
{81.95114135742186, 17.66495983051931},
},
"geometry",
[]string{"multilinestring1"},
},
// test intersecting query envelope for linestring2.
{
[][]float64{
{81.9854736328125, 18.27369419984127},
{82.14752197265625, 18.633232565431218},
},
"geometry",
[]string{"linestring2"},
},
// test intersecting query envelope for circle2.
{
[][]float64{
{82.6336669921875, 17.82714499951342},
{82.66387939453125, 17.861132899477624},
},
"geometry",
[]string{"circle2"},
},
// test intersecting query envelope for polygon3.
{
[][]float64{
{82.92343139648438, 17.739530934289657},
{82.98797607421874, 17.79184300887134},
},
"geometry",
[]string{"polygon3"},
},
}
i := setupGeoJsonShapesIndexForEnvelopeQuery(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapeEnvelopeRelationQuery("intersects", indexReader, test.points, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v", n, test.want, got, test.points)
}
}
}
func TestGeoJsonEnvelopeContainsQuery(t *testing.T) {
tests := []struct {
points [][]float64
field string
want []string
}{
// test envelope contained within polygon1.
{
[][]float64{
{8.548285961151123, 47.376092756617446},
{8.551225662231445, 47.37764752629426},
},
"geometry",
[]string{"polygon1"},
},
// test envelope partially contained within polygon1.
{
[][]float64{
{8.549273014068604, 47.376194471922986},
{8.551654815673828, 47.37827232736301},
},
"geometry", nil,
},
// test envelope partially contained within polygon1.
{
[][]float64{
{8.549273014068604, 47.376194471922986},
{8.551654815673828, 47.37827232736301},
},
"geometry", nil,
},
// test envelope fully contained within circle3.
{
[][]float64{
{8.532772064208984, 47.380379160110856},
{8.534531593322752, 47.38299442157271},
},
"geometry",
[]string{"circle3"},
},
// test envelope partially contained within circle3.
{
[][]float64{
{8.532836437225342, 47.38010309716447},
{8.538415431976318, 47.383081594720466},
},
"geometry", nil,
},
}
i := setupGeoJsonShapesIndexForEnvelopeQuery(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapeEnvelopeRelationQuery("contains",
indexReader, test.points, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v",
n, test.want, got, test.points)
}
}
}
func runGeoShapeEnvelopeRelationQuery(relation string, i index.IndexReader,
points [][]float64, field string,
) ([]string, error) {
var rv []string
s := geo.NewGeoEnvelope(points)
gbs, err := NewGeoShapeSearcher(context.TODO(), i, s, relation, field, 1.0, search.SearcherOptions{})
if err != nil {
return nil, err
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(gbs.DocumentMatchPoolSize(), 0),
}
docMatch, err := gbs.Next(ctx)
for docMatch != nil && err == nil {
docID, _ := i.ExternalID(docMatch.IndexInternalID)
rv = append(rv, docID)
docMatch, err = gbs.Next(ctx)
}
if err != nil {
return nil, err
}
return rv, nil
}
func setupGeoJsonShapesIndexForEnvelopeQuery(t *testing.T) index.Index {
analysisQueue := index.NewAnalysisQueue(1)
i, err := scorch.NewScorch(
gtreap.Name,
map[string]interface{}{
"path": "",
"spatialPlugin": "s2",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
polygon1 := [][][][]float64{{{
{8.548071384429932, 47.379216780040124},
{8.547642230987549, 47.3771680227784},
{8.545818328857422, 47.37677569847655},
{8.546290397644043, 47.37417465983494},
{8.551719188690186, 47.37417465983494},
{8.553242683410645, 47.37679022905829},
{8.548071384429932, 47.379216780040124},
}}}
doc := document.NewDocument("polygon1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygon1, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
polygon2 := [][][][]float64{{{
{76.70379638671874, 16.828203242420393},
{76.36322021484375, 16.58881695544584},
{76.70928955078125, 16.720385051694},
{76.70379638671874, 16.828203242420393},
}}}
doc = document.NewDocument("polygon2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygon2, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
polygon3 := [][][][]float64{{{
{82.9522705078125, 17.749994573141873},
{82.94952392578125, 17.692436998627272},
{82.87673950195312, 17.64009591883757},
{82.76412963867188, 17.58643052828743},
{82.8094482421875, 17.522272941245202},
{82.99621582031249, 17.64009591883757},
{82.9522705078125, 17.749994573141873},
}}}
doc = document.NewDocument("polygon3")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygon3, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
envelope1 := [][][][]float64{{{
{74.89654541015625, 17.403062993328923},
{74.92401123046875, 17.66495983051931},
}}}
doc = document.NewDocument("envelope1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
envelope1, "envelope", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("circle1")
doc.AddField(document.NewGeoCircleFieldWithIndexingOptions("geometry", []uint64{},
[]float64{75.0531005859375, 17.675427818339383}, "12900m",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("circle2")
doc.AddField(document.NewGeoCircleFieldWithIndexingOptions("geometry", []uint64{},
[]float64{82.69683837890625, 17.902955242676995}, "6000m",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("circle3")
doc.AddField(document.NewGeoCircleFieldWithIndexingOptions("geometry", []uint64{},
[]float64{
8.53363037109375,
47.38191927423153,
}, "400m",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
point1 := [][][][]float64{{{{76.29730224609375, 16.796653031618053}}}}
doc = document.NewDocument("point1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
point1, "point",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
linestring1 := [][][][]float64{{{
{76.85211181640624, 17.51048642597462},
{77.24212646484374, 16.93070509876554},
}}}
doc = document.NewDocument("linestring1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
linestring1, "linestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
linestring2 := [][][][]float64{{{
{81.89208984375, 18.555136195095105},
{82.21343994140625, 18.059701055000478},
}}}
doc = document.NewDocument("linestring2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
linestring2, "linestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multipoint1 := [][][][]float64{{{
{81.24938964843749, 17.602139123350838},
{81.30432128906249, 17.56548361143177},
{81.29058837890625, 17.180155043474496},
{81.09283447265625, 17.87681743233167},
}}}
doc = document.NewDocument("multipoint1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
multipoint1, "multipoint", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multilinestring := [][][][]float64{{
{
{81.69708251953125, 17.641404631355755},
{81.90994262695312, 17.642713334367667},
},
{{81.6998291015625, 17.620464090732245}, {81.69708251953125, 17.468572623463153}},
{{81.70120239257811, 17.458092664041494}, {81.81243896484375, 17.311310073048123}},
{{81.815185546875, 17.3034434020238}, {81.81243896484375, 17.109292665395643}},
}}
doc = document.NewDocument("multilinestring1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
multilinestring, "multilinestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multilinestring1 := [][][][]float64{{
{
{77.6015853881836, 12.990089451715061},
{77.60476112365723, 12.987747683302153},
},
{{77.59875297546387, 12.988751301039581}, {77.59446144104004, 12.98197680263484}},
{{77.60188579559325, 12.982604078764705}, {77.60557651519775, 12.987329508048184}},
}}
doc = document.NewDocument("multilinestring2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
multilinestring1, "multilinestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
return i
}
================================================
FILE: search/searcher/search_geoshape_geometrycollection_test.go
================================================
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestGeoJSONIntersectsQueryAgainstGeometryCollection(t *testing.T) {
tests := []struct {
points [][][][][]float64
types []string
field string
want []string
}{
// test intersects geometrycollection query for gc_polygon1_linestring1.
{
[][][][][]float64{
{{{
{-120.80017089843749, 36.54053616262899},
{-120.67932128906249, 36.33725319397006},
{-120.30578613281251, 36.90597988519294},
{-120.80017089843749, 36.54053616262899},
}}},
{{{{-118.24584960937499, 35.32184842037683}, {-117.8668212890625, 35.06597313798418}}}},
},
[]string{"polygon", "linestring"},
"geometry",
[]string{"gc_polygon1_linestring1"},
},
// test intersects geometrycollection query for gc_polygon1_linestring1.
{
[][][][][]float64{
{{
{{-118.3172607421875, 35.250105158539355}, {-117.50976562499999, 35.37561413174875}},
{{-118.69628906249999, 34.6241677899049}, {-118.3172607421875, 35.03899204678081}},
{{-117.94921874999999, 35.146862906756304}, {-117.674560546875, 34.41144164327245}},
}},
{{{
{-117.04284667968749, 35.263561862152095},
{-116.8505859375, 35.263561862152095},
{-116.8505859375, 35.33529320309328},
{-117.04284667968749, 35.33529320309328},
{-117.04284667968749, 35.263561862152095},
}}},
},
[]string{"multilinestring", "polygon"},
"geometry",
[]string{"gc_polygon1_linestring1"},
},
// test intersects geometrycollection query for gc_multipolygon1_multilinestring1.
{
[][][][][]float64{
{{
{{-115.8563232421875, 38.53957267203905}, {-115.58166503906251, 38.54816542304656}},
{{-115.8343505859375, 38.45789034424927}, {-115.81237792968749, 38.19502155795575}},
}},
{{{{-116.64905548095702, 37.94920616351679}}}},
},
[]string{"multilinestring", "point"},
"geometry",
[]string{"gc_multipolygon1_multilinestring1"},
},
// test intersects geometrycollection query for gc_polygon1_linestring1 and gc_multipolygon1_multilinestring1.
{
[][][][][]float64{
{{{{-116.64905548095702, 37.94920616351679}, {-118.29528808593751, 34.52466147177172}}}},
{{
{{-115.8563232421875, 38.53957267203905}, {-115.58166503906251, 38.54816542304656}},
{{-115.8343505859375, 38.45789034424927}, {-115.81237792968749, 38.19502155795575}},
}},
},
[]string{"multipoint", "multilinestring"},
"geometry",
[]string{
"gc_polygon1_linestring1",
"gc_multipolygon1_multilinestring1",
},
},
// test intersects geometrycollection query for gc_polygon1_linestring1 and gc_multipolygon1_multilinestring1.
{
[][][][][]float64{
{{{
{-117.46582031249999, 36.146746777814364},
{-116.70227050781249, 36.146746777814364},
{-116.70227050781249, 36.69485094156225},
{-117.46582031249999, 36.69485094156225},
{-117.46582031249999, 36.146746777814364},
}}, {{
{-115.5267333984375, 38.06106741381201},
{-115.4937744140625, 37.18220222107978},
{-114.93896484374999, 37.304644804751106},
{-115.5267333984375, 38.06106741381201},
}}},
{{
{{-115.8563232421875, 38.53957267203905}, {-115.58166503906251, 38.54816542304656}},
{{-115.8343505859375, 38.45789034424927}, {-115.81237792968749, 38.19502155795575}},
}},
},
[]string{"multipolygon", "multilinestring"},
"geometry",
[]string{"gc_point1_multipoint1"},
},
}
i := setupGeoJsonShapesIndexForGeometryCollectionQuery(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapeGeometryCollectionRelationQuery("intersects",
indexReader, test.points, test.types, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v",
n, test.want, got, test.points)
}
}
}
func TestGeoJSONWithInQueryAgainstGeometryCollection(t *testing.T) {
tests := []struct {
points [][][][][]float64
types []string
field string
want []string
}{
// test within geometrycollection query for gc_multipoint2_multipolygon2_multiline2.
{
[][][][][]float64{
{{{{-122.40434646606444, 37.73400071182758}, {-122.39730834960938, 37.73691949864062}}}},
{{{
{-122.42511749267578, 37.760808496517235},
{-122.42314338684082, 37.74248523826606},
{-122.40082740783691, 37.756669194195815},
{-122.42511749267578, 37.760808496517235},
}}},
{
{{
{-122.46339797973633, 37.76637243960179},
{-122.46176719665527, 37.7502901437285},
{-122.43644714355469, 37.75911208915015},
{-122.46339797973633, 37.76637243960179},
}},
{{
{-122.43653297424315, 37.714720253587004},
{-122.40563392639159, 37.714720253587004},
{-122.40563392639159, 37.72904529863455},
{-122.43653297424315, 37.72904529863455},
{-122.43653297424315, 37.714720253587004},
}},
},
},
[]string{"linestring", "polygon", "multipolygon"},
"geometry",
[]string{"gc_multipoint2_multipolygon2_multiline2"},
},
// test within geometrycollection query.
{
[][][][][]float64{
{{{{-122.40434646606444, 37.73400071182758}, {-122.39730834960938, 37.73691949864062}}}},
{
{{
{-122.46339797973633, 37.76637243960179},
{-122.46176719665527, 37.7502901437285},
{-122.43644714355469, 37.75911208915015},
{-122.46339797973633, 37.76637243960179},
}},
{{
{-122.43653297424315, 37.714720253587004},
{-122.40563392639159, 37.714720253587004},
{-122.40563392639159, 37.72904529863455},
{-122.43653297424315, 37.72904529863455},
{-122.43653297424315, 37.714720253587004},
}},
},
},
[]string{"linestring", "multipolygon"},
"geometry", nil,
},
// test within geometrycollection for gc_multipoint2_multipolygon2_multiline2.
{
[][][][][]float64{
{{{
{-122.4491500854492, 37.78170504295941},
{-122.4862289428711, 37.747371884118664},
{-122.43078231811525, 37.6949593672454},
{-122.3799705505371, 37.72945260537779},
{-122.3928451538086, 37.78007695280165},
{-122.4491500854492, 37.78170504295941},
}}},
},
[]string{"polygon"},
"geometry",
[]string{"gc_multipoint2_multipolygon2_multiline2"},
},
// test within geometrycollection for gc_multipolygon3
// gc_multipolygon3's multipolygons within the geometrycollection is covered by the
// query's geometric collection of a polygon and a multipolygon.
{
[][][][][]float64{
{{{
{86.6162109375, 57.26716357153586},
{85.1220703125, 8119},
{84.462890625, 56.27996083172844},
{86.98974609375, 55.70235509327093},
{87.802734375, 56.77680831656842},
{86.6162109375, 57.26716357153586},
}}},
{
{{
{75.1025390625, 54.3549556895541},
{73.1689453125, 54.29088164657006},
{72.7294921875, 53.08082737207479},
{74.091796875, 51.998410382390325},
{76.79443359375, 53.396432127095984},
{75.1025390625, 54.3549556895541},
}},
{{
{80.1123046875, 55.57834467218206},
{78.9697265625, 55.65279803318956},
{78.5302734375, 54.635697306063854},
{79.87060546875, 54.18815548107151},
{80.96923828125, 54.80068486732233},
{80.1123046875, 55.57834467218206},
}},
},
},
[]string{"polygon", "multipolygon"},
"geometry",
[]string{"gc_multipolygon3"},
},
}
i := setupGeoJsonShapesIndexForGeometryCollectionQuery(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapeGeometryCollectionRelationQuery("within", indexReader, test.points, test.types, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v", n, test.want, got, test.points)
}
}
}
func TestGeoJSONContainsQueryAgainstGeometryCollection(t *testing.T) {
tests := []struct {
points [][][][][]float64
types []string
field string
want []string
}{
// test contains for a geometrycollection that comprises of a linestring,
// polygon, multipolygon, point and multipoint for polygon2.
{
[][][][][]float64{
// linestring
{{{{7.457013130187988, 46.966401589723894}, {7.482891082763671, 46.94554547022893}}}},
// polygon
{{{
{7.466454505920409, 46.965054389418476},
{7.46143341064453, 46.9641171865865},
{7.466325759887694, 46.96101258493027},
{7.466454505920409, 46.965054389418476},
}}},
// multipolygon
{
{{
{7.4811744689941415, 46.957966385567474},
{7.478899955749511, 46.95492001277476},
{7.484478950500488, 46.95509576976545},
{7.4811744689941415, 46.957966385567474},
}},
{{
{7.466540336608888, 46.94753769790697},
{7.464609146118165, 46.946219320241674},
{7.468342781066894, 46.94592634301753},
{7.466540336608888, 46.94753769790697},
}},
{{
{7.504348754882812, 47.00425575323296},
{7.501087188720703, 47.001680295206874},
{7.507266998291015, 47.00191443288521},
{7.504348754882812, 47.00425575323296},
}},
},
// point
{{{{7.449932098388673, 46.95817142366062}}}},
// multipoint
{{{{7.479157447814942, 46.96370715518446}, {7.4532365798950195, 46.96657730900153}}}},
},
[]string{"linestring", "polygon", "multipolygon", "point", "multipoint"},
"geometry",
[]string{"multipolygon4"},
},
// test contains for a geometrycollection query with one point inside the multipoint lying outside
// polygon2.
{
[][][][][]float64{
// linestring
{{{{7.457013130187988, 46.966401589723894}, {7.482891082763671, 46.94554547022893}}}},
// polygon
{{{
{7.466454505920409, 46.965054389418476},
{7.46143341064453, 46.9641171865865},
{7.466325759887694, 46.96101258493027},
{7.466454505920409, 46.965054389418476},
}}},
// multipolygon
{
{{
{7.4811744689941415, 46.957966385567474},
{7.478899955749511, 46.95492001277476},
{7.484478950500488, 46.95509576976545},
{7.4811744689941415, 46.957966385567474},
}},
{{
{7.466540336608888, 46.94753769790697},
{7.464609146118165, 46.946219320241674},
{7.468342781066894, 46.94592634301753},
{7.466540336608888, 46.94753769790697},
}},
},
// point
{{{{7.449932098388673, 46.95817142366062}}}},
// multipoint
{{{{7.479157447814942, 46.96370715518446}, {7.475638389587402, 46.965200825877794}}}},
},
[]string{"linestring", "polygon", "multipolygon", "point", "multipoint"},
"geometry",
nil,
},
// test contains for a geometrycollection query with one point inside the multipoint lying outside
// polygon2.
{
[][][][][]float64{
// linestring
{{{{7.457013130187988, 46.966401589723894}, {7.482891082763671, 46.94554547022893}}}},
// polygon
{{{
{7.466454505920409, 46.965054389418476},
{7.46143341064453, 46.9641171865865},
{7.466325759887694, 46.96101258493027},
{7.466454505920409, 46.965054389418476},
}}},
// multipolygon
{
{{
{7.4811744689941415, 46.957966385567474},
{7.478899955749511, 46.95492001277476},
{7.484478950500488, 46.95509576976545},
{7.4811744689941415, 46.957966385567474},
}},
{{
{7.466540336608888, 46.94753769790697},
{7.464609146118165, 46.946219320241674},
{7.468342781066894, 46.94592634301753},
{7.466540336608888, 46.94753769790697},
}},
},
// point
{{{{7.449932098388673, 46.95817142366062}}}},
// multipoint
{{{{7.479157447814942, 46.96370715518446}, {7.4532365798950195, 46.96657730900153}}}},
},
[]string{"linestring", "polygon", "multipolygon", "point", "multipoint"},
"geometry",
[]string{"polygon2", "multipolygon4"},
},
}
i := setupGeoJsonShapesIndexForGeometryCollectionQuery(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapeGeometryCollectionRelationQuery("contains",
indexReader, test.points, test.types, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v",
n, test.want, got, test.points)
}
}
}
func runGeoShapeGeometryCollectionRelationQuery(relation string, i index.IndexReader,
points [][][][][]float64, types []string, field string,
) ([]string, error) {
var rv []string
s, _, err := geo.NewGeometryCollection(points, types)
if err != nil {
return nil, err
}
gbs, err := NewGeoShapeSearcher(context.TODO(), i, s, relation, field, 1.0, search.SearcherOptions{})
if err != nil {
return nil, err
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(gbs.DocumentMatchPoolSize(), 0),
}
docMatch, err := gbs.Next(ctx)
for docMatch != nil && err == nil {
docID, _ := i.ExternalID(docMatch.IndexInternalID)
rv = append(rv, docID)
docMatch, err = gbs.Next(ctx)
}
if err != nil {
return nil, err
}
return rv, nil
}
func setupGeoJsonShapesIndexForGeometryCollectionQuery(t *testing.T) index.Index {
analysisQueue := index.NewAnalysisQueue(1)
i, err := scorch.NewScorch(
gtreap.Name,
map[string]interface{}{
"path": "",
"spatialPlugin": "s2",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
// document gc_polygon1_linestring1
polygon1 := [][][][]float64{{{
{-118.15246582031249, 34.876918445772084},
{-118.46557617187499, 34.773203753940734},
{-118.3172607421875, 34.50655662164561},
{-117.91625976562499, 34.4793919710481},
{-117.76245117187499, 34.76417891445512},
{-118.15246582031249, 34.876918445772084},
}}}
linestring1 := [][][][]float64{{{
{-120.78918457031251, 36.87522650673951},
{-118.9215087890625, 34.95349314197422},
}}}
coordinates := [][][][][]float64{polygon1, linestring1}
types := []string{"polygon", "linestring"}
doc := document.NewDocument("gc_polygon1_linestring1")
doc.AddField(document.NewGeometryCollectionFieldWithIndexingOptions("geometry",
[]uint64{}, coordinates, types, document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
// document gc_multipolygon1_multilinestring1
multipolygon1 := [][][][]float64{
{{
{-117.24609374999999, 37.67512527892127},
{-117.61962890624999, 37.26530995561875},
{-116.597900390625, 37.56199695314352},
{-117.24609374999999, 37.67512527892127},
}},
{{
{-117.60864257812501, 38.71123253895224},
{-117.41638183593749, 38.36750215395045},
{-117.66357421875, 37.93986540897977},
{-116.6473388671875, 37.94852933714952},
{-117.1307373046875, 38.363195134453846},
{-116.75170898437501, 38.7283759182398},
{-117.60864257812501, 38.71123253895224},
}},
}
multilinestring1 := [][][][]float64{{
{{-118.9215087890625, 38.74123075381228}, {-118.78967285156249, 38.43207668538207}},
{{-118.57543945312501, 38.8225909761771}, {-118.45458984375, 38.522384090200845}},
{{-118.94897460937499, 38.788345355085625}, {-118.61938476562499, 38.86965182408357}},
}}
coordinates = [][][][][]float64{multipolygon1, multilinestring1}
types = []string{"multipolygon", "multilinestring"}
doc = document.NewDocument("gc_multipolygon1_multilinestring1")
doc.AddField(document.NewGeometryCollectionFieldWithIndexingOptions("geometry",
[]uint64{}, coordinates, types, document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
// document gc_point1_multipoint1
point1 := [][][][]float64{{{{-115.10925292968749, 36.20882309283712}}}}
multipoint1 := [][][][]float64{{{
{-117.13623046874999, 36.474306755095235},
{-118.57543945312501, 36.518465989675875},
{-118.58642578124999, 36.90597988519294},
{-119.5477294921875, 37.85316995894978},
}}}
coordinates = [][][][][]float64{point1, multipoint1}
types = []string{"point", "multipoint"}
doc = document.NewDocument("gc_point1_multipoint1")
doc.AddField(document.NewGeometryCollectionFieldWithIndexingOptions("geometry",
[]uint64{}, coordinates, types, document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
// document gc_multipoint2_multipolygon2_multiline2
multipoint2 := [][][][]float64{{{
{-122.4052906036377, 37.75626203719391},
{-122.42091178894044, 37.74757548736071},
}}}
multipolygon2 := [][][][]float64{
{{
{-122.46168136596681, 37.765151122096945},
{-122.46168136596681, 37.754972691904946},
{-122.45103836059569, 37.754972691904946},
{-122.451810836792, 37.7624370109886},
{-122.46168136596681, 37.765151122096945},
}},
{{
{-122.41902351379395, 37.726194088705576},
{-122.43533134460448, 37.71668926284967},
{-122.40777969360353, 37.71634978222733},
{-122.41902351379395, 37.726194088705576},
}},
}
multilinestring2 := [][][][]float64{{
{{-122.41284370422362, 37.73155698786267}, {-122.40700721740721, 37.73338978839743}},
{{-122.40434646606444, 37.73400071182758}, {-122.39730834960938, 37.73691949864062}},
}}
coordinates = [][][][][]float64{multipoint2, multipolygon2, multilinestring2}
types = []string{"multipoint", "multipolygon", "multiline"}
doc = document.NewDocument("gc_multipoint2_multipolygon2_multiline2")
doc.AddField(document.NewGeometryCollectionFieldWithIndexingOptions("geometry",
[]uint64{}, coordinates, types, document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
// document gc_multipolygon3
multipolygon3 := [][][][]float64{
{{
{85.60546875, 57.20771009775018},
{86.396484375, 55.99838095535963},
{87.03369140625, 56.71656572651468},
{85.60546875, 57.20771009775018},
}},
{{
{79.56298828125, 55.3915921070334},
{79.60693359375, 54.43171285946844},
{80.39794921875, 54.85131525968606},
{79.56298828125, 55.3915921070334},
}},
{{
{74.35546875, 54.13669645687002},
{74.1796875, 52.802761415419674},
{75.87158203125, 53.44880683542759},
{74.35546875, 54.13669645687002},
}},
}
coordinates = [][][][][]float64{multipolygon3}
types = []string{"multipolygon"}
doc = document.NewDocument("gc_multipolygon3")
doc.AddField(document.NewGeometryCollectionFieldWithIndexingOptions("geometry",
[]uint64{}, coordinates, types, document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
polygon2 := [][][][]float64{{{
{7.452635765075683, 46.96692874582506},
{7.449803352355956, 46.95817142366062},
{7.4573564529418945, 46.95149263607834},
{7.462162971496582, 46.945955640812095},
{7.483148574829102, 46.945311085627445},
{7.487225532531738, 46.957029058564686},
{7.4793291091918945, 46.96388288331302},
{7.464480400085448, 46.96903731827891},
{7.452635765075683, 46.96692874582506},
}}}
doc = document.NewDocument("polygon2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygon2, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multipolygon4 := [][][][]float64{
{{
{7.452635765075683, 46.96692874582506},
{7.449803352355956, 46.95817142366062},
{7.4573564529418945, 46.95149263607834},
{7.462162971496582, 46.945955640812095},
{7.483148574829102, 46.945311085627445},
{7.487225532531738, 46.957029058564686},
{7.4793291091918945, 46.96388288331302},
{7.464480400085448, 46.96903731827891},
{7.452635765075683, 46.96692874582506},
}},
{{
{7.4478721618652335, 47.00015837528636},
{7.5110435485839835, 47.00015837528636},
{7.5110435485839835, 47.00683108710118},
{7.4478721618652335, 47.00683108710118},
{7.4478721618652335, 47.00015837528636},
}},
}
doc = document.NewDocument("multipolygon4")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
multipolygon4, "multipolygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
return i
}
================================================
FILE: search/searcher/search_geoshape_linestring_test.go
================================================
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestGeoJsonLinestringIntersectsQuery(t *testing.T) {
tests := []struct {
line [][]float64
field string
want []string
}{
// test intersecting linestring query for polygon1.
{
[][]float64{
{74.85860824584961, 22.407219759334023},
{74.8663330078125, 22.382936446589863},
},
"geometry",
[]string{"polygon1"},
},
// test intersecting linestring query for polygon1 and polygon2.
{
[][]float64{
{74.82461929321289, 22.393729553598526},
{74.93671417236328, 22.356743809494784},
},
"geometry",
[]string{"polygon1", "polygon2"},
},
// test intersecting linestring query for envelope1.
{
[][]float64{
{74.83938217163086, 22.325782524687973},
{74.8692512512207, 22.311172762889516},
},
"geometry",
[]string{"envelope1"},
},
// test intersecting linestring query for circle.
{
[][]float64{
{74.94546890258789, 22.310815439776572},
{74.93276596069336, 22.303708490145645},
},
"geometry",
[]string{"circle1"},
},
// test intersecting linestring query for linestring1.
{
[][]float64{
{74.938645362854, 22.321614134448936},
{74.94070529937744, 22.320224643365446},
},
"geometry",
[]string{"linestring1"},
},
// test intersecting linestring query for multilinestring1.
{
[][]float64{
{74.9241828918457, 22.307996525380194},
{74.94100570678711, 22.293781977618558},
},
"geometry",
[]string{"multilinestring1"},
},
// test intersecting linestring query for multipolygon1.
{
[][]float64{
{36.22072219848633, 50.007132228568786},
{36.22218132019043, 49.99791917183082},
},
"geometry",
[]string{"multipolygon1"},
},
// test intersecting linestring query for envelope2, circle2,
// multipolygon1 and gc_polygonInGc_multipolygonInGc.
{
[][]float64{
{36.19840621948242, 50.03834418692451},
{36.25720024108887, 50.02136210283289},
},
"geometry",
[]string{"envelope2", "circle2", "multipolygon1", "gc_polygonInGc_multipolygonInGc"},
},
}
i := setupGeoJsonShapesIndexForLinestringQuery(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapeLinestringQueryWithRelation("intersects",
indexReader, test.line, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v",
n, test.want, got, test.line)
}
}
}
func TestGeoJsonLinestringContainsQuery(t *testing.T) {
tests := []struct {
line [][]float64
field string
want []string
}{
// test a linestring query for multipolygon1.
{
[][]float64{
{36.21668815612793, 50.040494087443996},
{36.226301193237305, 50.03861982057644},
},
"geometry",
[]string{"multipolygon1"},
},
// test a linestring query with endspoints on two
// different polygons in a multipolygon.
{
[][]float64{
{36.19746208190918, 50.038564693972646},
{36.21565818786621, 50.03718650830641},
},
"geometry", nil,
},
// test a linestring query for envelope2.
{
[][]float64{
{36.25290870666503, 50.03018471417061},
{36.23110771179199, 50.01854955486945},
},
"geometry",
[]string{"envelope2"},
},
// test a linestring query for circle2.
{
[][]float64{
{36.220550537109375, 50.02930252595981},
{36.224327087402344, 50.02847545979485},
},
"geometry",
[]string{"circle2"},
},
// test a linestring query for polygonWithHole2.
{
[][]float64{
{36.27367973327637, 49.89883638369706},
{36.27445220947265, 49.89596137883285},
},
"geometry",
[]string{"polygonWithHole2"},
},
// test a linestring query within the hole of polygonWithHole2.
{[][]float64{
{36.261234283447266, 49.89540847364305},
{36.26243591308594, 49.89087441212101},
}, "geometry", nil},
}
i := setupGeoJsonShapesIndexForLinestringQuery(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapeLinestringQueryWithRelation("contains",
indexReader, test.line, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v",
n, test.want, got, test.line)
}
}
}
func TestGeoJsonMultiLinestringContainsQuery(t *testing.T) {
tests := []struct {
line [][][]float64
field string
want []string
}{
// test a multilinestring query for multipolygon1.
{
[][][]float64{
{
{36.21668815612793, 50.040494087443996},
{36.226301193237305, 50.03861982057644},
},
{
{36.226816177368164, 49.999463999158},
{36.234025955200195, 50.00271900853649},
},
},
"geometry",
[]string{"multipolygon1"},
},
// test a multilinestring query that is covered by the geometryCollection.
{
[][][]float64{{
{36.28664016723633, 49.96574238290487},
{36.30251884460449, 49.96369956194569},
}, {
{36.19179725646973, 50.03983258984584},
{36.19420051574707, 50.03801342445342},
}},
"geometry",
[]string{"gc_polygonInGc_multipolygonInGc"},
},
// test a multilinestring query for envelope2.
{
[][][]float64{
{
{36.23213768005371, 50.02913711386621},
{36.25187873840332, 50.02902683882067},
},
{
{36.231794357299805, 50.018935600613254},
{36.2314510345459, 50.025883893582055},
},
},
"geometry",
[]string{"envelope2"},
},
// test a multilinestring query with one linestring outside of envelope2.
{
[][][]float64{
{
{36.23213768005371, 50.02913711386621},
{36.25187873840332, 50.02902683882067},
},
{{36.231794357299805, 50.018935600613254}, {36.2314510345459, 50.025883893582055}},
{{36.25659942626953, 50.024284772330844}, {36.24406814575195, 50.01518531066489}},
},
"geometry", nil,
},
// test a multilinestring query with one linestring
// inside the whole of a polygonWithHole2.
{
[][][]float64{
{
{36.27367973327637, 49.89883638369706},
{36.27445220947265, 49.89596137883285},
},
{{36.261234283447266, 49.89540847364305}, {36.26243591308594, 49.89087441212101}},
},
"geometry", nil,
},
// test a multilinestring query for polygonWithHole2.
{
[][][]float64{
{
{36.27367973327637, 49.89883638369706},
{36.27445220947265, 49.89596137883285},
},
{{36.279258728027344, 49.894302644257856}, {36.28166198730469, 49.887335336408235}},
},
"geometry",
[]string{"polygonWithHole2"},
},
// test a multilinestring query for polygonWithHole2 with last line cross the hole.
{
[][][]float64{
{
{36.27367973327637, 49.89883638369706},
{36.27445220947265, 49.89596137883285},
},
{{36.279258728027344, 49.894302644257856}, {36.28166198730469, 49.887335336408235}},
{{36.254024505615234, 49.89839408640621}, {36.27016067504883, 49.90038439228633}},
},
"geometry", nil,
},
}
i := setupGeoJsonShapesIndexForLinestringQuery(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapeMultiLinestringQueryWithRelation("contains",
indexReader, test.line, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v",
n, test.want, got, test.line)
}
}
}
func runGeoShapeMultiLinestringQueryWithRelation(relation string, i index.IndexReader,
points [][][]float64, field string,
) ([]string, error) {
s := geo.NewGeoJsonMultilinestring(points)
return executeSearch(relation, i, s, field)
}
func runGeoShapeLinestringQueryWithRelation(relation string, i index.IndexReader,
points [][]float64, field string,
) ([]string, error) {
s := geo.NewGeoJsonLinestring(points)
return executeSearch(relation, i, s, field)
}
func executeSearch(relation string, i index.IndexReader,
s index.GeoJSON, field string,
) ([]string, error) {
var rv []string
gbs, err := NewGeoShapeSearcher(context.TODO(), i, s, relation, field, 1.0, search.SearcherOptions{})
if err != nil {
return nil, err
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(gbs.DocumentMatchPoolSize(), 0),
}
docMatch, err := gbs.Next(ctx)
for docMatch != nil && err == nil {
docID, _ := i.ExternalID(docMatch.IndexInternalID)
rv = append(rv, docID)
docMatch, err = gbs.Next(ctx)
}
if err != nil {
return nil, err
}
return rv, nil
}
func setupGeoJsonShapesIndexForLinestringQuery(t *testing.T) index.Index {
analysisQueue := index.NewAnalysisQueue(1)
i, err := scorch.NewScorch(
gtreap.Name,
map[string]interface{}{
"path": "",
"spatialPlugin": "s2",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
polygon1 := [][][][]float64{{{
{74.84642028808594, 22.402776071459712},
{74.83234405517578, 22.39039647758608},
{74.86719131469727, 22.38801566009795},
{74.85139846801758, 22.39103135536648},
{74.86461639404297, 22.394840561182853},
{74.8495101928711, 22.397697397065034},
{74.86186981201172, 22.401982540816856},
{74.84642028808594, 22.402776071459712},
}}}
doc := document.NewDocument("polygon1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry",
[]uint64{}, polygon1, "polygon",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
polygon2 := [][][][]float64{{{
{74.93431091308592, 22.376428433285266},
{74.92898941040039, 22.39103135536648},
{74.9241828918457, 22.37722210974017},
{74.90821838378906, 22.37388863821397},
{74.92504119873047, 22.369920115637292},
{74.92864608764648, 22.355632497760894},
{74.93207931518555, 22.370396344320053},
{74.94855880737305, 22.3743648533201},
{74.93431091308592, 22.376428433285266},
}}}
doc = document.NewDocument("polygon2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry",
[]uint64{}, polygon2, "polygon",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
envelope1 := [][][][]float64{{{
{74.86736297607422, 22.307361269208684},
{74.87028121948242, 22.345471522338478},
}}}
doc = document.NewDocument("envelope1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry",
[]uint64{}, envelope1, "envelope",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
envelope2 := [][][][]float64{{{
{36.23007774353027, 50.01810835593541},
{36.25333786010742, 50.03068093791795},
}}}
doc = document.NewDocument("envelope2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry",
[]uint64{}, envelope2, "envelope",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("circle1")
doc.AddField(document.NewGeoCircleFieldWithIndexingOptions("geometry",
[]uint64{}, []float64{74.93671417236328, 22.308314152382284}, "300m",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("circle2")
doc.AddField(document.NewGeoCircleFieldWithIndexingOptions("geometry",
[]uint64{}, []float64{36.22243881225586, 50.02941280037234}, "600m",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
linestring := [][][][]float64{{{
{74.92697238922119, 22.320343743143248},
{74.94036197662354, 22.32054224254707},
}}}
doc = document.NewDocument("linestring1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry",
[]uint64{}, linestring, "linestring",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
linestring1 := [][][][]float64{{{
{77.60188579559325, 12.982604078764705},
{77.60557651519775, 12.987329508048184},
}}}
doc = document.NewDocument("linestring2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry",
[]uint64{}, linestring1, "linestring",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multilinestring := [][][][]float64{{
{
{74.92203712463379, 22.3113315728684},
{74.92323875427246, 22.307798008137024},
},
{{74.92405414581299, 22.307559787072712}, {74.92735862731934, 22.310021385140573}},
{{74.9223804473877, 22.311688894660474}, {74.92534160614014, 22.30930673210729}},
}}
doc = document.NewDocument("multilinestring1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry",
[]uint64{}, multilinestring, "multilinestring",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multilinestring1 := [][][][]float64{{
{
{77.6015853881836, 12.990089451715061},
{77.60476112365723, 12.987747683302153},
},
{{77.59875297546387, 12.988751301039581}, {77.59446144104004, 12.98197680263484}},
{{77.60188579559325, 12.982604078764705}, {77.60557651519775, 12.987329508048184}},
}}
doc = document.NewDocument("multilinestring2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry",
[]uint64{}, multilinestring1, "multilinestring",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multipoint1 := [][][][]float64{{{
{77.56618022918701, 12.958180959662695},
{77.56407737731932, 12.951614746607163},
{77.56922721862793, 12.956173473406446},
}}}
doc = document.NewDocument("multipoint1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry",
[]uint64{}, multipoint1, "multipoint",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
polygonWithHole1 := [][][][]float64{{
{
{77.59991168975829, 12.972232910164502},
{77.6039457321167, 12.97582941279006},
{77.60424613952637, 12.98168407323241},
{77.59974002838135, 12.985489528568463},
{77.59321689605713, 12.979300406693417},
{77.59991168975829, 12.972232910164502},
},
{
{77.59682178497314, 12.975787593290978},
{77.60295867919922, 12.975787593290978},
{77.60295867919922, 12.98143316204164},
{77.59682178497314, 12.98143316204164},
{77.59682178497314, 12.975787593290978},
},
}}
doc = document.NewDocument("polygonWithHole1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry",
[]uint64{}, polygonWithHole1, "polygon",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
polygonWithHole2 := [][][][]float64{{
{
{36.261234283447266, 49.90712870720605},
{36.2479305267334, 49.89480027061714},
{36.254539489746094, 49.883408870659736},
{36.280717849731445, 49.883408870659736},
{36.28741264343262, 49.890432041848264},
{36.27788543701172, 49.90276159448742},
{36.261234283447266, 49.90712870720605},
},
{
{36.264581680297844, 49.905249238801304},
{36.25368118286133, 49.89673543545543},
{36.253509521484375, 49.88578690918283},
{36.270332336425774, 49.886174020645804},
{36.27127647399902, 49.89579550794111},
{36.264581680297844, 49.905249238801304},
},
}}
doc = document.NewDocument("polygonWithHole2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry",
[]uint64{}, polygonWithHole2, "polygon",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multipolygon1 := [][][][]float64{{{
{36.1875057220459, 50.04363607656457},
{36.192398071289055, 50.034871067327856},
{36.20218276977539, 50.03955696315653},
{36.1875057220459, 50.04363607656457},
}}, // polygon1
{{
{36.2123966217041, 50.03795829715335},
{36.218318939208984, 50.0333273779768},
{36.226558685302734, 50.03867494711694},
{36.217031478881836, 50.04286437899031},
{36.2123966217041, 50.03795829715335},
}}, // polygon2
{{
{36.221065521240234, 50.00365685169585},
{36.226301193237305, 49.998029518286025},
{36.23342514038086, 49.9995743420677},
{36.23531341552734, 50.002994846659156},
{36.231021881103516, 50.00630478067617},
{36.22810363769531, 50.00663576154257},
{36.226043701171875, 50.004815338573046},
{36.221065521240234, 50.00365685169585},
}}}
doc = document.NewDocument("multipolygon1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry",
[]uint64{}, multipolygon1, "multipolygon",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
polygonInGc := [][][][]float64{{{
{36.1875057220459, 50.04363607656457},
{36.192398071289055, 50.034871067327856},
{36.20218276977539, 50.03955696315653},
{36.1875057220459, 50.04363607656457},
}}}
multipolygonInGc := [][][][]float64{{{
{36.29015922546387, 49.980150089789376},
{36.28337860107422, 49.961656654293485},
{36.307411193847656, 49.96033147865059},
{36.29015922546387, 49.980150089789376},
}}, // polygon1
{{
{36.16106986999512, 50.00387751801547},
{36.161842346191406, 49.9908012905034},
{36.17900848388672, 49.99841572888488},
{36.16106986999512, 50.00387751801547},
}}}
coordinates := [][][][][]float64{polygonInGc, multipolygonInGc}
types := []string{"polygon", "multipolygon"}
doc = document.NewDocument("gc_polygonInGc_multipolygonInGc")
doc.AddField(document.NewGeometryCollectionFieldWithIndexingOptions("geometry",
[]uint64{}, coordinates, types,
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
return i
}
================================================
FILE: search/searcher/search_geoshape_points_test.go
================================================
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestGeoJsonPointContainsQuery(t *testing.T) {
tests := []struct {
point []float64
field string
want []string
}{
// test points inside the polygon1.
{
[]float64{77.58334636688232, 12.948268838994263},
"geometry",
[]string{"polygon1"},
},
// test points inside the circle1.
{
[]float64{77.58553504943848, 12.954040501528555},
"geometry",
[]string{"circle1"},
},
// test points inside the polygon1 and the circle.
{
[]float64{77.59293794631958, 12.948896200093982},
"geometry",
[]string{"polygon1", "circle1"},
},
// test points outside the polygon1 and the circle1.
{
[]float64{77.5614595413208, 12.953287683563568},
"geometry", nil,
},
// test point within the envelope1.
{
[]float64{81.28166198730469, 26.34203746601541},
"geometry",
[]string{"envelope1"},
},
// test point on the linestring vertex.
{
[]float64{77.57776737213135, 12.952074805390097},
"geometry",
[]string{"linestring1"},
},
// test point on the multilinestring vertex.
{
[]float64{77.5779390335083, 12.945006535817749},
"geometry",
[]string{"multilinestring1"},
},
// test point on the multipoint vertex.
{
[]float64{77.56407737731932, 12.951614746607163},
"geometry",
[]string{"multipoint1"},
},
// test point within the polygonWithHole1.
{
[]float64{77.60334491729736, 12.979844051951334},
"geometry",
[]string{"polygonWithHole1"},
},
// test point within the hole of the polygonWithHole1.
{
[]float64{77.60244369506836, 12.976247607394027},
"geometry", nil,
},
}
i := setupGeoJsonShapesIndex(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapePointRelationQuery("contains",
false, indexReader, [][]float64{test.point}, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v",
n, test.want, got, test.point)
}
}
}
func TestGeoJsonMultiPointWithInQuery(t *testing.T) {
tests := []struct {
multipoint [][]float64
field string
want []string
}{
// test multipoint inside the polygon1.
{
[][]float64{
{77.58334636688232, 12.948268838994263},
{77.58467674255371, 12.944295515355652},
},
"geometry",
[]string{"polygon1"},
},
// test multipoint inside the circle1.
{
[][]float64{
{77.58553504943848, 12.954040501528555},
{77.58643627166747, 12.956089827794571},
},
"geometry",
[]string{"circle1"},
},
// test multipoint inside the envelope1.
{
[][]float64{
{81.28166198730469, 26.34203746601541},
{80.94314575195312, 26.346960121309415},
},
"geometry",
[]string{"envelope1"},
},
// test multipoint inside the polygon1 and the circle.
{
[][]float64{
{77.59293794631958, 12.948896200093982},
{77.58532047271729, 12.953789562459688},
},
"geometry",
[]string{"polygon1", "circle1"},
},
// test multipoint (only 1 point outside) outside.
{[][]float64{
{77.58334636688232, 12.948268838994263},
{77.58643627166747, 12.956089827794571},
{77.5615, 12.9533},
}, "geometry", nil},
// test multipoint on the linestring vertex.
{
[][]float64{
{77.5841188430786, 12.957093573282744},
{77.57776737213135, 12.952074805390097},
},
"geometry",
[]string{"linestring1"},
},
// test multipoint outside the linestring vertex.
{
[][]float64{
{77.5841188430786, 12.957093573282744},
{77.57776737213135, 12.952074805390097},
{77.58334636688232, 12.948268838994263},
},
"geometry", nil,
},
// test multipoint on the multilinestring vertex.
{
[][]float64{
{77.5779390335083, 12.94471376293191},
{77.57218837738037, 12.948268838994263},
},
"geometry",
[]string{"multilinestring1"},
},
// test multipoint outside the multilinestring vertex.
{
[][]float64{
{77.5779390335083, 12.94471376293191},
{77.57218837738037, 12.948268838994263},
{77.58532047271729, 12.953789562459688},
},
"geometry", nil,
},
// test multipoint with one inside the hole within the polygonWithHole1.
{
[][]float64{
{77.60334491729736, 12.979844051951334},
{77.60244369506836, 12.976247607394027},
},
"geometry", nil,
},
// test multipoint with all inside the hole within the polygonWithHole1.
{
[][]float64{
{77.59656429290771, 12.981767710239714},
{77.59888172149658, 12.979969508380469},
},
"geometry", nil,
},
// test multipoint with all inside the polygonWithHole1.
{
[][]float64{
{77.60334491729736, 12.979844051951334},
{77.59656429290771, 12.981767710239714},
{77.59802341461182, 12.9751602999608},
},
"geometry",
[]string{"polygonWithHole1"},
},
}
i := setupGeoJsonShapesIndex(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapePointRelationQuery("contains",
true, indexReader, test.multipoint, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v",
n, test.want, got, test.multipoint)
}
}
}
func TestGeoJsonMultiPointIntersectsQuery(t *testing.T) {
tests := []struct {
multipoint [][]float64
field string
want []string
}{
// test multipoint inside the polygon1.
{
[][]float64{
{77.58334636688232, 12.948268838994263},
{77.58467674255371, 12.944295515355652},
},
"geometry",
[]string{"polygon1"},
},
// test multipoint inside the circle1.
{
[][]float64{
{77.58553504943848, 12.954040501528555},
{77.58643627166747, 12.956089827794571},
},
"geometry",
[]string{"circle1"},
},
// test multipoint inside the envelope1. (1 point outside)
{
[][]float64{
{81.28166198730469, 26.34203746601541},
{80.94314575195312, 26.346960121309415},
{81.12716674804688, 26.353728430338332},
},
"geometry",
[]string{"envelope1"},
},
// test multipoint inside the polygon1 and the circle.
{
[][]float64{
{77.59293794631958, 12.948896200093982},
{77.58532047271729, 12.953789562459688},
},
"geometry",
[]string{"polygon1", "circle1"},
},
// test multipoint (only 1 point outside) intersects.
{
[][]float64{
{77.58334636688232, 12.948268838994263},
{77.58643627166747, 12.956089827794571},
{77.5615, 12.9533},
},
"geometry",
[]string{"polygon1", "circle1"},
},
// test multipoint on the linestring vertex.
{
[][]float64{
{77.5841188430786, 12.957093573282744},
{77.57776737213135, 12.952074805390097},
},
"geometry",
[]string{"linestring1"},
},
// test multipoint outside the linestring vertex.
{
[][]float64{
{77.5841188430786, 12.957093573282744},
{77.57776737213135, 12.952074805390097},
{77.58334636688232, 12.948268838994263},
},
"geometry",
[]string{"polygon1", "linestring1"},
},
// test multipoint on the multilinestring vertex.
{
[][]float64{
{77.5779390335083, 12.94471376293191},
{77.57218837738037, 12.948268838994263},
},
"geometry",
[]string{"multilinestring1"},
},
// test multipoint outside the multilinestring vertex.
{
[][]float64{
{77.5779390335083, 12.94471376293191},
{77.57218837738037, 12.948268838994263},
{77.58532047271729, 12.953789562459688},
},
"geometry",
[]string{"polygon1", "circle1", "multilinestring1"},
},
// test multipoint with one inside the hole within the polygonWithHole1.
{
[][]float64{
{77.60334491729736, 12.979844051951334},
{77.60244369506836, 12.976247607394027},
},
"geometry",
[]string{"polygonWithHole1"},
},
// test multipoint with all inside the hole within the polygonWithHole1.
{
[][]float64{
{77.60244369506836, 12.976247607394027},
{77.59888172149658, 12.979969508380469},
},
"geometry", nil,
},
// test multipoint with all inside the polygonWithHole1.
{
[][]float64{
{77.60334491729736, 12.979844051951334},
{77.59656429290771, 12.981767710239714},
{77.59802341461182, 12.9751602999608},
},
"geometry",
[]string{"polygonWithHole1"},
},
}
i := setupGeoJsonShapesIndex(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapePointRelationQuery("intersects",
true, indexReader, test.multipoint, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v",
n, test.want, got, test.multipoint)
}
}
}
func runGeoShapePointRelationQuery(relation string, multi bool,
i index.IndexReader, points [][]float64, field string,
) ([]string, error) {
var rv []string
var s index.GeoJSON
if multi {
s = geo.NewGeoJsonMultiPoint(points)
} else {
s = geo.NewGeoJsonPoint(points[0])
}
gbs, err := NewGeoShapeSearcher(context.TODO(), i, s, relation, field, 1.0, search.SearcherOptions{})
if err != nil {
return nil, err
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(gbs.DocumentMatchPoolSize(), 0),
}
docMatch, err := gbs.Next(ctx)
for docMatch != nil && err == nil {
docID, _ := i.ExternalID(docMatch.IndexInternalID)
rv = append(rv, docID)
docMatch, err = gbs.Next(ctx)
}
if err != nil {
return nil, err
}
return rv, nil
}
type Fatalfable interface {
Fatalf(format string, args ...interface{})
}
func setupGeoJsonShapesIndex(t *testing.T) index.Index {
analysisQueue := index.NewAnalysisQueue(1)
i, err := scorch.NewScorch(
gtreap.Name,
map[string]interface{}{
"path": "",
"spatialPlugin": "s2",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
polygon1 := [][][][]float64{{{
{77.5853419303894, 12.953977766785052},
{77.58405447006226, 12.95393594361393},
{77.5819730758667, 12.9495026476557},
{77.58068561553955, 12.94883346405509},
{77.58019208908081, 12.948331575175299},
{77.57991313934326, 12.943814529775414},
{77.58497714996338, 12.94394000436408},
{77.58517026901245, 12.9446301134728},
{77.58572816848755, 12.945508431393435},
{77.58785247802734, 12.946365833997325},
{77.58967638015747, 12.946428570657417},
{77.59070634841918, 12.947474179333993},
{77.59317398071289, 12.948875288082773},
{77.59167194366454, 12.949962710338657},
{77.59077072143555, 12.950276388953625},
{77.59098529815674, 12.951196510612728},
{77.58729457855225, 12.952472128200755},
{77.5853419303894, 12.953977766785052},
}}}
doc := document.NewDocument("polygon1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygon1, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
// not working envelope
envelope1 := [][][][]float64{{{
{80.93696594238281, 26.33957605983274},
{81.28440856933594, 26.351267272877074},
}}}
doc = document.NewDocument("envelope1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
envelope1, "envelope", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("circle1")
doc.AddField(document.NewGeoCircleFieldWithIndexingOptions("geometry", []uint64{},
[]float64{77.59137153625487, 12.952660333521468}, "900m",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
linestring := [][][][]float64{{{
{77.5841188430786, 12.957093573282744},
{77.57776737213135, 12.952074805390097},
}}}
doc = document.NewDocument("linestring1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
linestring, "linestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multilinestring := [][][][]float64{{{
{77.57227420806883, 12.948687079902895},
{77.57600784301758, 12.954165970968194},
{77.5779390335083, 12.94471376293191},
{77.57218837738037, 12.948268838994263},
{77.57781028747559, 12.951740217268595},
{77.5779390335083, 12.945006535817749},
}}}
doc = document.NewDocument("multilinestring1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
multilinestring, "multilinestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multipoint1 := [][][][]float64{{{
{77.56618022918701, 12.958180959662695},
{77.56407737731932, 12.951614746607163},
{77.56922721862793, 12.956173473406446},
}}}
doc = document.NewDocument("multipoint1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
multipoint1, "multipoint", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
polygonWithHole1 := [][][][]float64{{
{
{77.59991168975829, 12.972232910164502},
{77.6039457321167, 12.97582941279006},
{77.60424613952637, 12.98168407323241},
{77.59974002838135, 12.985489528568463},
{77.59321689605713, 12.979300406693417},
{77.59991168975829, 12.972232910164502},
},
{
{77.59682178497314, 12.975787593290978},
{77.60295867919922, 12.975787593290978},
{77.60295867919922, 12.98143316204164},
{77.59682178497314, 12.98143316204164},
{77.59682178497314, 12.975787593290978},
},
}}
doc = document.NewDocument("polygonWithHole1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygonWithHole1, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
return i
}
================================================
FILE: search/searcher/search_geoshape_polygon_test.go
================================================
// Copyright (c) 2022 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestGeoJsonPolygonIntersectsQuery(t *testing.T) {
tests := []struct {
polygon [][][]float64
field string
want []string
}{
// test intersecting query polygon for polygon1.
{[][][]float64{{
{77.57926940917969, 12.945257483731918},
{77.57875442504883, 12.942036966318216},
{77.58278846740721, 12.9424970427816},
{77.57926940917969, 12.945257483731918},
}}, "geometry", []string{"polygon1"}},
// test intersecting query polygon for polygon1, polygon2, circle1.
{
[][][]float64{{
{77.59562015533446, 12.94099133483504},
{77.59665012359619, 12.949356263896634},
{77.59313106536865, 12.951321981484776},
{77.59085655212402, 12.948477959536318},
{77.59562015533446, 12.94099133483504},
}},
"geometry",
[]string{"polygon1", "polygon2", "circle1"},
},
// test intersecting query polygon for polygon1, polygon2 and polygon3.
{[][][]float64{{
{77.5929594039917, 12.939151012774925},
{77.58321762084961, 12.94546660680072},
{77.59737968444824, 12.931998723107322},
{77.60111331939697, 12.955169724209911},
{77.59592056274414, 12.936265025833965},
{77.5929594039917, 12.939151012774925},
}}, "geometry", []string{"polygon1", "polygon2", "polygon3"}},
// test intersecting query polygon for polygon2 and the circle1.
{
[][][]float64{{
{77.59012699127197, 12.959853852513307},
{77.59836673736572, 12.959853852513307},
{77.59836673736572, 12.965541604118611},
{77.59012699127197, 12.965541604118611},
{77.59012699127197, 12.959853852513307},
}},
"geometry",
[]string{"polygon2", "circle1"},
},
// test intersecting query polygon for linestring2 and multilinestring2.
{
[][][]float64{{
{77.59669303894043, 12.989504011681609},
{77.60699272155762, 12.983231353311314},
{77.60115623474121, 12.993183897537897},
{77.59669303894043, 12.989504011681609},
}},
"geometry",
[]string{"linestring2", "multilinestring2"},
},
// test intersecting query polygon for multilinestring2.
{
[][][]float64{{
{77.60124206542969, 12.987162237749484},
{77.60330200195312, 12.992849364713313},
{77.59514808654785, 12.989671280403403},
{77.60124206542969, 12.987162237749484},
}},
"geometry",
[]string{"multilinestring2"},
},
// test intersecting query polygon for multipoint1.
{
[][][]float64{{
{77.56648063659668, 12.956382587313202},
{77.56819725036621, 12.949523559614263},
{77.5718879699707, 12.958222782120954},
{77.56648063659668, 12.956382587313202},
}},
"geometry",
[]string{"multipoint1"},
},
// test intersecting query polygon for envelope1.
{[][][]float64{{
{36.19986534118652, 50.00034673534484},
{36.19351387023926, 50.00464984215712},
{36.178321838378906, 49.991573824716205},
{36.19986534118652, 50.00034673534484},
}}, "geometry", []string{"envelope1"}},
// test intersecting query polygon for envelope1.
{[][][]float64{{
{36.170082092285156, 49.99229116680205},
{36.14982604980469, 49.99002874388075},
{36.227073669433594, 49.98754547425633},
{36.170082092285156, 49.99229116680205},
}}, "geometry", []string{"envelope1"}},
}
i := setupGeoJsonShapesIndexForPolygonQuery(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapePolygonQueryWithRelation("intersects",
indexReader, test.polygon, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v", n,
test.want, got, test.polygon)
}
}
}
func TestGeoJsonPolygonContainsQuery(t *testing.T) {
tests := []struct {
polygon [][][]float64
field string
want []string
}{
// test containment query polygon for polygon1.
{
[][][]float64{{
{77.5843334197998, 12.952702156906767},
{77.58510589599608, 12.952702156906767},
{77.58510589599608, 12.953622269606669},
{77.5843334197998, 12.953622269606669},
{77.5843334197998, 12.952702156906767},
}},
"geometry",
[]string{"polygon1"},
},
// test containment query polygon for circle1.
{
[][][]float64{{
{77.59025573730469, 12.953810474058429},
{77.59145736694336, 12.953810474058429},
{77.59145736694336, 12.954918786278716},
{77.59025573730469, 12.954918786278716},
{77.59025573730469, 12.953810474058429},
}},
"geometry",
[]string{"circle1"},
},
// test containment query polygon for polygon2, polygon3.
{
[][][]float64{{
{77.60235786437988, 12.956884459972992},
{77.60124206542969, 12.956800814599926},
{77.6008129119873, 12.955713422193524},
{77.60244369506836, 12.955211547173878},
{77.60313034057617, 12.955880713641998},
{77.60235786437988, 12.956884459972992},
}},
"geometry",
[]string{"polygon2", "polygon3"},
},
// test containment query polygon which resides within a hole in polygonWithHole1.
{
[][][]float64{{
{77.60012626647949, 12.97963495776207},
{77.5978946685791, 12.978213112610835},
{77.60089874267577, 12.977962197916442},
{77.60012626647949, 12.97963495776207},
}},
"geometry", nil,
},
// test containment query polygon which resides within polygonWithHole1.
{
[][][]float64{{
{77.59978294372559, 12.984067716910454},
{77.59780883789062, 12.982227713276774},
{77.60089874267577, 12.982227713276774},
{77.59978294372559, 12.984067716910454},
}},
"geometry",
[]string{"polygonWithHole1"},
},
// test with query polygon for polygon4 with a single vertex lying outside.
{
[][][]float64{{
{-121.48138761520384, 38.50964107572585},
{-121.48226737976073, 38.509238097766875},
{-121.48115158081055, 38.50781086602439},
{-121.48014307022095, 38.50806273250507},
{-121.48138761520384, 38.50964107572585},
}},
"geometry", nil,
},
// test with query polygon for polygon4.
{
[][][]float64{{
{-121.48381233215332, 38.507974579337045},
{-121.48361384868622, 38.507869634948676},
{-121.48361921310425, 38.50765135013098},
{-121.48343682289122, 38.50797038156446},
{-121.48381233215332, 38.507974579337045},
}},
"geometry",
[]string{"polygon4"},
},
// test with query polygon for multipolygon1.
{[][][]float64{{
{-121.47578716278075, 38.51617236229197},
{-121.47578716278075, 38.51566868518406},
{-121.47546529769896, 38.516105205547866},
{-121.47578716278075, 38.51617236229197},
}}, "geometry", []string{"multipolygon1"}},
// test with query polygon for envelope1.
{
[][][]float64{{
{36.197547912597656, 49.99642946989866},
{36.18939399719238, 49.988649165474},
{36.20201110839844, 49.98853879749191},
{36.1970329284668, 49.980150089789376},
{
36.205787658691406,
49.9885939815146,
},
{36.197547912597656, 49.99642946989866},
}},
"geometry",
[]string{"envelope1"},
},
// test with query polygon for no hits. (envelope1 has one vertex outside the polygon)
{[][][]float64{{
{36.19832038879394, 49.99626394461266},
{36.19016647338867, 49.98439981533724},
{36.20698928833008, 49.98158510403259},
{36.19832038879394, 49.99626394461266},
}}, "geometry", nil},
}
i := setupGeoJsonShapesIndexForPolygonQuery(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapePolygonQueryWithRelation("contains",
indexReader, test.polygon, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v",
n, test.want, got, test.polygon)
}
}
}
func TestGeoJsonPolygonWithInQuery(t *testing.T) {
tests := []struct {
polygon [][][]float64
field string
want []string
}{
// test with query polygon for polygon1.
{
[][][]float64{{
{77.58407592773438, 12.956382587313202},
{77.57746696472168, 12.943249893344905},
{77.5920581817627, 12.944086391304364},
{77.59454727172852, 12.95353862313803},
{77.58407592773438, 12.956382587313202},
}},
"geometry",
[]string{"polygon1"},
},
// test with query polygon for circle1 and polygon3.
{
[][][]float64{{
{77.59248733520508, 12.967841760870071},
{77.58261680603027, 12.968594534825176},
{77.57789611816406, 12.957302686416881},
{77.58896827697754, 12.945341132980488},
{77.60450363159178, 12.947599652080394},
{77.60673522949219, 12.96483064227584},
{77.59248733520508, 12.967841760870071},
}},
"geometry",
[]string{"polygon3", "circle1"},
},
// test with query polygon for linestring2, multilinestring2.
{
[][][]float64{{
{77.59909629821777, 12.998118204343788},
{77.58931159973145, 12.978882217224443},
{77.61128425598145, 12.983565899088745},
{77.59909629821777, 12.998118204343788},
}},
"geometry",
[]string{"linestring2", "multilinestring2"},
},
// test with query polygon for multipoint1.
{[][][]float64{{
{77.55703926086426, 12.964245142762644},
{77.5631332397461, 12.944253690559432},
{77.57429122924805, 12.957720912158363},
{77.55703926086426, 12.964245142762644},
}}, "geometry", []string{"multipoint1"}},
// test with query polygon with no results.
// (polygon4 has one vertex lying outside the query polygon).
{
[][][]float64{{
{-121.48812532424927, 38.51058134885975},
{-121.48258924484252, 38.500153704565065},
{-121.47492885589598, 38.50799556819636},
{-121.48630142211913, 38.51147123890908},
{-121.48812532424927, 38.51058134885975},
}},
"geometry", nil,
},
// test with query polygon for polygon4.
{[][][]float64{{
{-121.48366212844849, 38.510161585585045},
{-121.48533582687377, 38.50841534409804},
{-121.48376941680908, 38.507777283760426},
{-121.48370504379272, 38.50250467407243},
{-121.48010015487672, 38.50253825879518},
{-121.48018598556519, 38.504502937819765},
{-121.47756814956665, 38.50755899866278},
{-121.48113012313843, 38.50866720846446},
{-121.48115158081055, 38.51017837616302},
{-121.48366212844849, 38.510161585585045},
}}, "geometry", []string{"polygon4"}},
// test with query polygon for envelope1.
{
[][][]float64{{
{36.20587348937988, 50.00470500769241},
{36.17969512939453, 49.993946530777606},
{36.19368553161621, 49.971870325635074},
{36.21119499206543, 49.983075265826656},
{36.20587348937988, 50.00470500769241},
}},
"geometry",
[]string{"envelope1"},
},
// test with query polygon for linestring2 which lies outside except the endpoints.
{
[][][]float64{{
{8.515305519104004, 47.392597129887},
{8.514232635498047, 47.38896544894171},
{8.507537841796875, 47.38815191810328},
{8.514318466186523, 47.38725120859953},
{8.516035079956053, 47.383357642070706},
{8.516979217529295, 47.38733837470806},
{8.522472381591797, 47.38794853343167},
{8.516507148742676, 47.388994503382285},
{8.515305519104004, 47.392597129887},
}},
"geometry", nil,
},
// test with query polygon for all the shapes.
{
[][][]float64{{
{-135.0, -38.0},
{149.0, -38.0},
{149.0, 77.0},
{-135.0, 77.0},
}},
"geometry",
[]string{
"polygon1", "polygon2", "polygon3", "envelope1", "circle1", "linestring1",
"linestring2", "linestring3", "multilinestring1", "multilinestring2", "multipoint1",
"polygonWithHole1", "polygon4", "multipolygon1",
},
},
}
i := setupGeoJsonShapesIndexForPolygonQuery(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapePolygonQueryWithRelation("within",
indexReader, test.polygon, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v",
n, test.want, got, test.polygon)
}
}
}
func runGeoShapePolygonQueryWithRelation(relation string, i index.IndexReader,
points [][][]float64, field string,
) ([]string, error) {
var rv []string
s := geo.NewGeoJsonPolygon(points)
gbs, err := NewGeoShapeSearcher(context.TODO(), i, s, relation, field, 1.0, search.SearcherOptions{})
if err != nil {
return nil, err
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(gbs.DocumentMatchPoolSize(), 0),
}
docMatch, err := gbs.Next(ctx)
for docMatch != nil && err == nil {
docID, _ := i.ExternalID(docMatch.IndexInternalID)
rv = append(rv, docID)
docMatch, err = gbs.Next(ctx)
}
if err != nil {
return nil, err
}
return rv, nil
}
func setupGeoJsonShapesIndexForPolygonQuery(t *testing.T) index.Index {
analysisQueue := index.NewAnalysisQueue(1)
i, err := scorch.NewScorch(
gtreap.Name,
map[string]interface{}{
"path": "",
"spatialPlugin": "s2",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
polygon1 := [][][][]float64{{{
{77.5853419303894, 12.953977766785052},
{77.58405447006226, 12.95393594361393},
{77.5819730758667, 12.9495026476557},
{77.58068561553955, 12.94883346405509},
{77.58019208908081, 12.948331575175299},
{77.57991313934326, 12.943814529775414},
{77.58497714996338, 12.94394000436408},
{77.58517026901245, 12.9446301134728},
{77.58572816848755, 12.945508431393435},
{77.58785247802734, 12.946365833997325},
{77.58967638015747, 12.946428570657417},
{77.59070634841918, 12.947474179333993},
{77.59317398071289, 12.948875288082773},
{77.59167194366454, 12.949962710338657},
{77.59077072143555, 12.950276388953625},
{77.59098529815674, 12.951196510612728},
{77.58729457855225, 12.952472128200755},
{77.5853419303894, 12.953977766785052},
}}}
doc := document.NewDocument("polygon1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygon1, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
polygon2 := [][][][]float64{{{
{77.59527683258057, 12.951112863329588},
{77.59420394897461, 12.947976069940545},
{77.59579181671143, 12.946010325958518},
{77.60347366333008, 12.950401860289055},
{77.60673522949219, 12.95600618215462},
{77.60107040405273, 12.96345053407734},
{77.5984525680542, 12.961861309096507},
{77.59527683258057, 12.951112863329588},
}}}
doc = document.NewDocument("polygon2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygon2, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
polygon3 := [][][][]float64{{{
{77.59974002838135, 12.953789562459688},
{77.60347366333008, 12.953789562459688},
{77.60347366333008, 12.957720912158363},
{77.59974002838135, 12.957720912158363},
{77.59974002838135, 12.953789562459688},
}}}
doc = document.NewDocument("polygon3")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygon3, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
/*polygon4 := [][][][]float64{{{{8.515305519104004, 47.392597129887},
{8.514232635498047, 47.38896544894171}, {8.507537841796875, 47.38815191810328},
{8.514318466186523, 47.38725120859953}, {8.516035079956053, 47.383357642070706},
{8.516979217529295, 47.38733837470806}, {8.522472381591797, 47.38794853343167},
{8.516507148742676, 47.388994503382285}, {8.515305519104004, 47.392597129887}}}}
doc = document.NewDocument("polygon4")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygon4, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}*/
// not working envelope
envelope1 := [][][][]float64{{{
{36.18896484375, 49.9799293145682},
{36.20613098144531, 49.99714673955337},
}}}
doc = document.NewDocument("envelope1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
envelope1, "envelope", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("circle1")
doc.AddField(document.NewGeoCircleFieldWithIndexingOptions("geometry",
[]uint64{}, []float64{77.59253025054932, 12.955587953533424}, "900m",
document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
linestring := [][][][]float64{{{
{77.5841188430786, 12.957093573282744},
{77.57776737213135, 12.952074805390097},
}}}
doc = document.NewDocument("linestring1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
linestring, "linestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
linestring1 := [][][][]float64{{{
{77.60188579559325, 12.982604078764705},
{77.60557651519775, 12.987329508048184},
}}}
doc = document.NewDocument("linestring2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
linestring1, "linestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
linestring3 := [][][][]float64{{{
{8.51539134979248, 47.390592472948434},
{8.520884513854979, 47.388006643417924},
}}}
doc = document.NewDocument("linestring3")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
linestring3, "linestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multilinestring := [][][][]float64{{
{
{77.57227420806883, 12.948687079902895},
{77.57600784301758, 12.954165970968194},
},
{{77.5779390335083, 12.94471376293191}, {77.57218837738037, 12.948268838994263}},
{{77.57781028747559, 12.951740217268595}, {77.5779390335083, 12.945006535817749}},
}}
doc = document.NewDocument("multilinestring1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
multilinestring, "multilinestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multilinestring1 := [][][][]float64{{
{
{77.6015853881836, 12.990089451715061},
{77.60476112365723, 12.987747683302153},
},
{{77.59875297546387, 12.988751301039581}, {77.59446144104004, 12.98197680263484}},
{{77.60188579559325, 12.982604078764705}, {77.60557651519775, 12.987329508048184}},
}}
doc = document.NewDocument("multilinestring2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
multilinestring1, "multilinestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multipoint1 := [][][][]float64{{{
{77.56618022918701, 12.958180959662695},
{77.56407737731932, 12.951614746607163},
{77.56922721862793, 12.956173473406446},
}}}
doc = document.NewDocument("multipoint1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
multipoint1, "multipoint", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
polygonWithHole1 := [][][][]float64{{
{
{77.59991168975829, 12.972232910164502},
{77.6039457321167, 12.97582941279006},
{77.60424613952637, 12.98168407323241},
{77.59974002838135, 12.985489528568463},
{77.59321689605713, 12.979300406693417},
{77.59991168975829, 12.972232910164502},
},
{
{77.59682178497314, 12.975787593290978},
{77.60295867919922, 12.975787593290978},
{77.60295867919922, 12.98143316204164},
{77.59682178497314, 12.98143316204164},
{77.59682178497314, 12.975787593290978},
},
}}
doc = document.NewDocument("polygonWithHole1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygonWithHole1, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
polygon4 := [][][][]float64{{{
{-121.48125886917113, 38.51009442323401},
{-121.48361921310425, 38.51012800441735},
{-121.48497104644774, 38.50858325377352},
{-121.48366212844849, 38.507861239391026},
{-121.48353338241577, 38.50277335141579},
{-121.4803147315979, 38.50267259752949},
{-121.48033618927, 38.5046204810195},
{-121.47771835327147, 38.50754220747402},
{-121.48123741149902, 38.508616835661655},
{-121.48125886917113, 38.51009442323401},
}}}
doc = document.NewDocument("polygon4")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygon4, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multipolygon1 := [][][][]float64{
{{
{-121.49104356765746, 38.52149433504263},
{-121.47857666015625, 38.51592052417851},
{-121.47688150405884, 38.515970891871696},
{-121.4770746231079, 38.51714612804143},
{-121.49033546447754, 38.52221621271097},
{-121.49104356765746, 38.52149433504263},
}},
{{
{-121.47647380828859, 38.51714612804143},
{-121.47658109664916, 38.51477884701455},
{-121.4741563796997, 38.5159876810949},
{-121.47647380828859, 38.51714612804143},
}},
}
doc = document.NewDocument("multipolygon1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
multipolygon1, "multipolygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
return i
}
func TestGeoJsonMultiPolygonWithInQuery(t *testing.T) {
tests := []struct {
polygon [][][][]float64
field string
want []string
}{
// test within multipolygon query for multipolygon1.
// (where each query polygon contains each of the indexed polygons)
{
[][][][]float64{
{
{
{-121.49458408355713, 38.53270780324851},
{-121.48823261260985, 38.52533866992879},
{-121.48048639297485, 38.53253994984147},
{-121.49458408355713, 38.53270780324851},
},
},
{{
{-121.48700952529907, 38.53306029412857},
{-121.48160219192505, 38.53306029412857},
{-121.48160219192505, 38.53829709805414},
{-121.48700952529907, 38.53829709805414},
{-121.48700952529907, 38.53306029412857},
}},
{{
{-121.47344827651976, 38.54475865436684},
{-121.46396398544312, 38.54475865436684},
{-121.46396398544312, 38.55366961462033},
{-121.47344827651976, 38.55366961462033},
{-121.47344827651976, 38.54475865436684},
}},
},
"geometry",
[]string{"multipolygon1"},
},
// test within multipolygon query. (only partial containment of the three
// indexed polygons by the two query polygons)
{
[][][][]float64{
{
{
{-121.49458408355713, 38.53270780324851},
{-121.48823261260985, 38.52533866992879},
{-121.48048639297485, 38.53253994984147},
{-121.49458408355713, 38.53270780324851},
},
},
{{
{-121.48700952529907, 38.53306029412857},
{-121.48160219192505, 38.53306029412857},
{-121.48160219192505, 38.53829709805414},
{-121.48700952529907, 38.53829709805414},
{-121.48700952529907, 38.53306029412857},
}},
{{
{-121.4734697341919, 38.544825784372485},
{-121.4644145965576, 38.544825784372485},
{-121.4644145965576, 38.5537199558913},
{-121.4734697341919, 38.5537199558913},
{-121.4734697341919, 38.544825784372485},
}},
},
"geometry", nil,
},
// test within multipolygon query for multilinestring1.
{[][][][]float64{
{{
{-121.49876832962036, 38.551739839324334},
{-121.49814605712889, 38.54553064564853},
{-121.49158000946044, 38.54908841140355},
{-121.49876832962036, 38.551739839324334},
}},
{
{
{-121.49258852005006, 38.54294612052762},
{-121.49117231369017, 38.54294612052762},
{-121.49117231369017, 38.54526212788182},
{-121.49258852005006, 38.54526212788182},
{-121.49258852005006, 38.54294612052762},
},
},
}, "geometry", []string{"multilinestring1"}},
// test within multipolygon query for multipoint1.
{[][][][]float64{
{{
{-121.50286674499512, 38.564810956372185},
{-121.49694442749023, 38.56226068115802},
{-121.48406982421875, 38.5675624676039},
{-121.4875030517578, 38.57514535565976},
{-121.50286674499512, 38.564810956372185},
}},
{{
{-121.48685932159422, 38.565163289911425},
{-121.48623704910278, 38.56283114531348},
{-121.48357629776001, 38.565129734410704},
{-121.48685932159422, 38.565163289911425},
}},
{
{
{-121.49430513381958, 38.56195866888961},
{-121.4899492263794, 38.5584518779682},
{-121.48842573165892, 38.56194189039304},
{-121.49430513381958, 38.56195866888961},
},
},
}, "geometry", []string{"multipoint1"}},
}
i := setupGeoJsonShapesIndexForMultiPolygonQuery(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapeMultiPolygonQueryWithRelation("within",
indexReader, test.polygon, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v",
n, test.want, got, test.polygon)
}
}
}
func runGeoShapeMultiPolygonQueryWithRelation(relation string,
i index.IndexReader,
points [][][][]float64, field string,
) ([]string, error) {
var rv []string
s := geo.NewGeoJsonMultiPolygon(points)
gbs, err := NewGeoShapeSearcher(context.TODO(), i, s, relation,
field, 1.0, search.SearcherOptions{})
if err != nil {
return nil, err
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(gbs.DocumentMatchPoolSize(), 0),
}
docMatch, err := gbs.Next(ctx)
for docMatch != nil && err == nil {
docID, _ := i.ExternalID(docMatch.IndexInternalID)
rv = append(rv, docID)
docMatch, err = gbs.Next(ctx)
}
if err != nil {
return nil, err
}
return rv, nil
}
func setupGeoJsonShapesIndexForMultiPolygonQuery(t *testing.T) index.Index {
analysisQueue := index.NewAnalysisQueue(1)
i, err := scorch.NewScorch(
gtreap.Name,
map[string]interface{}{
"path": "",
"spatialPlugin": "s2",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
multipolygon1 := [][][][]float64{{{
{-121.49140834808348, 38.5320028163074},
{-121.49112939834593, 38.52916601331889},
{-121.48889780044556, 38.52913244101627},
{-121.4887261390686, 38.527655244193205},
{-121.48559331893921, 38.52794061412457},
{-121.48638725280762, 38.53213710006686},
{-121.49140834808348, 38.5320028163074},
}}, // polygon1
{{
{-121.48677349090575, 38.533194575914315},
{-121.48179531097412, 38.533194575914315},
{-121.48179531097412, 38.53814604174215},
{-121.48677349090575, 38.53814604174215},
{-121.48677349090575, 38.533194575914315},
}}, // polygon2
{{
{-121.47334098815918, 38.553485029658475},
{-121.47329807281494, 38.54485934935182},
{-121.46415710449219, 38.54526212788182},
{-121.47334098815918, 38.553485029658475},
}}}
doc := document.NewDocument("multipolygon1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
multipolygon1, "multipolygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multilinestring1 := [][][][]float64{{
{
{-121.4983820915222, 38.55081688500274},
{-121.49649381637572, 38.550447699956685},
},
{{-121.49655818939209, 38.548635309508775}, {-121.49370431900023, 38.54811507788636}},
{{-121.49134397506714, 38.54490969679143}, {-121.4919662475586, 38.54304681805045}},
}}
doc = document.NewDocument("multilinestring1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
multilinestring1, "multilinestring", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
multipoint1 := [][][][]float64{{{
{-121.48960590362547, 38.56066671319285},
{-121.4933180809021, 38.56157276247755},
{-121.4973521232605, 38.56318348855919},
{-121.48582935333252, 38.56736114108619},
{-121.50104284286498, 38.56449217691959},
{-121.4881682395935, 38.57158887950165},
}}}
doc = document.NewDocument("multipoint1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
multipoint1, "multipoint", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
return i
}
func setupGeoJsonPolygonS2LoopPortingIssue(t *testing.T) index.Index {
analysisQueue := index.NewAnalysisQueue(1)
i, err := scorch.NewScorch(
gtreap.Name,
map[string]interface{}{
"path": "",
"spatialPlugin": "s2",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
polygon1 := [][][][]float64{{{
{-135.0, -38.0},
{149.0, -38.0},
{149.0, 77.0},
{-135.0, 77.0},
}}}
doc := document.NewDocument("polygon1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygon1, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
return i
}
func TestGeoJsonPolygonContainsQueryS2LoopPortingIssue(t *testing.T) {
tests := []struct {
polygon [][][]float64
field string
want []string
}{
// test containment query polygon for polygon1.
{
[][][]float64{{
{13.007812500000002, 37.99616267972809},
{13.559375000000002, 37.99616267972809},
{13.559375000000002, 38.472819658516866},
{13.007812500000002, 38.472819658516866},
}},
"geometry",
[]string{"polygon1"},
},
// test containment query polygon for polygon1.
{
[][][]float64{{
{13.007812500000002, 37.99616267972809},
{13.359375000000002, 37.99616267972809},
{13.359375000000002, 38.272819658516866},
{13.007812500000002, 38.272819658516866},
}},
"geometry",
[]string{"polygon1"},
},
}
i := setupGeoJsonPolygonS2LoopPortingIssue(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapePolygonQueryWithRelation("contains",
indexReader, test.polygon, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v",
n, test.want, got, test.polygon)
}
}
}
func TestGeoJsonPolygonIntersectsQuery1(t *testing.T) {
tests := []struct {
polygon [][][]float64
field string
want []string
}{
// test non-intersecting query polygon.
{[][][]float64{{
{
97.745361328125,
68.21644657802169,
},
{
97.701416015625,
67.97051353559428,
},
{
97.80029296875,
67.97875365614591,
},
{
97.745361328125,
68.21644657802169,
},
}}, "geometry", nil},
// test intersecting query polygon.
{[][][]float64{{
{
77.59214401245117,
12.966043458314124,
},
{
77.58853912353516,
12.95232574618635,
},
{
77.60943889617919,
12.956466232826733,
},
{
77.59214401245117,
12.966043458314124,
},
}}, "geometry", nil},
// test intersecting query polygon for polygon1.
{[][][]float64{{
{97.0806884765625, 61.61423180712503},
{96.7510986328125, 61.54625879879804},
{97.305908203125, 61.367777577924},
{97.0806884765625, 61.61423180712503},
}}, "geometry", []string{"polygon1"}},
}
i := setupGeoJsonShapesIndexForPolygonQuery1(t)
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err = indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
for n, test := range tests {
got, err := runGeoShapePolygonQueryWithRelation("intersects",
indexReader, test.polygon, test.field)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("test %d, expected %v, got %v for polygon: %+v", n,
test.want, got, test.polygon)
}
}
}
func setupGeoJsonShapesIndexForPolygonQuery1(t *testing.T) index.Index {
analysisQueue := index.NewAnalysisQueue(1)
i, err := scorch.NewScorch(
gtreap.Name,
map[string]interface{}{
"path": "",
"spatialPlugin": "s2",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
polygon1 := [][][][]float64{{{
{96.69202458735312, 61.59480859768306},
{96.79202458735311, 61.39480859768306},
{96.79202458735311, 61.59480859768306},
{96.69202458735312, 61.59480859768306},
}}}
doc := document.NewDocument("polygon1")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygon1, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
polygon2 := [][][][]float64{{{
{91.35604953911839, 65.11164029408492},
{91.45604953911838, 64.91164029408492},
{91.45604953911838, 65.11164029408492},
{91.35604953911839, 65.11164029408492},
}}}
doc = document.NewDocument("polygon2")
doc.AddField(document.NewGeoShapeFieldWithIndexingOptions("geometry", []uint64{},
polygon2, "polygon", document.DefaultGeoShapeIndexingOptions))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
return i
}
================================================
FILE: search/searcher/search_ip_range.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"net"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
// netLimits returns the lo and hi bounds inside the network.
func netLimits(n *net.IPNet) (lo net.IP, hi net.IP) {
ones, bits := n.Mask.Size()
netNum := n.IP
if bits == net.IPv4len*8 {
netNum = netNum.To16()
ones += 8 * (net.IPv6len - net.IPv4len)
}
mask := net.CIDRMask(ones, 8*net.IPv6len)
lo = make(net.IP, net.IPv6len)
hi = make(net.IP, net.IPv6len)
for i := 0; i < net.IPv6len; i++ {
lo[i] = netNum[i] & mask[i]
hi[i] = lo[i] | ^mask[i]
}
return lo, hi
}
func NewIPRangeSearcher(ctx context.Context, indexReader index.IndexReader, ipNet *net.IPNet,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
lo, hi := netLimits(ipNet)
fieldDict, err := indexReader.FieldDictRange(field, lo, hi)
if err != nil {
return nil, err
}
defer fieldDict.Close()
var terms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
terms = append(terms, tfd.Term)
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(field, len(terms))
}
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true)
}
================================================
FILE: search/searcher/search_ip_range_test.go
================================================
// Copyright (c) 2021 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"net"
"testing"
)
func Test_netLimits(t *testing.T) {
tests := []struct {
arg string
lo string
hi string
}{
{"128.0.0.0/1", "128.0.0.0", "255.255.255.255"},
{"128.0.0.0/7", "128.0.0.0", "129.255.255.255"},
{"1.1.1.1/8", "1.0.0.0", "1.255.255.255"},
{"1.2.3.0/24", "1.2.3.0", "1.2.3.255"},
{"1.2.2.0/23", "1.2.2.0", "1.2.3.255"},
{"1.2.3.128/25", "1.2.3.128", "1.2.3.255"},
{"1.2.3.0/25", "1.2.3.0", "1.2.3.127"},
{"1.2.3.4/31", "1.2.3.4", "1.2.3.5"},
{"1.2.3.4/32", "1.2.3.4", "1.2.3.4"},
{"2a00:23c8:7283:ff00:1fa8:0:0:0/80", "2a00:23c8:7283:ff00:1fa8::", "2a00:23c8:7283:ff00:1fa8:ffff:ffff:ffff"},
}
for _, tt := range tests {
t.Run(tt.arg, func(t *testing.T) {
_, net, err := net.ParseCIDR(tt.arg)
if err != nil {
t.Fatal(err)
}
lo, hi := netLimits(net)
if lo.String() != tt.lo || hi.String() != tt.hi {
t.Errorf("netLimits(%q) = %s %s, want %s %s", tt.arg, lo, hi, tt.lo, tt.hi)
}
})
}
}
================================================
FILE: search/searcher/search_knn.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package searcher
import (
"context"
"encoding/json"
"reflect"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/scorer"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeKNNSearcher int
func init() {
var ks KNNSearcher
reflectStaticSizeKNNSearcher = int(reflect.TypeOf(ks).Size())
}
type KNNSearcher struct {
field string
vector []float32
k int64
indexReader index.IndexReader
vectorReader index.VectorReader
scorer *scorer.KNNQueryScorer
count uint64
vd index.VectorDoc
}
func NewKNNSearcher(ctx context.Context, i index.IndexReader, m mapping.IndexMapping,
options search.SearcherOptions, field string, vector []float32, k int64,
boost float64, similarityMetric string, searchParams json.RawMessage,
eligibleSelector index.EligibleDocumentSelector) (
search.Searcher, error) {
if vr, ok := i.(index.VectorIndexReader); ok {
vectorReader, err := vr.VectorReader(ctx, vector, field, k, searchParams, eligibleSelector)
if err != nil {
return nil, err
}
knnScorer := scorer.NewKNNQueryScorer(vector, field, boost,
options, similarityMetric)
return &KNNSearcher{
indexReader: i,
vectorReader: vectorReader,
field: field,
vector: vector,
k: k,
scorer: knnScorer,
}, nil
}
return nil, nil
}
func (s *KNNSearcher) VectorOptimize(ctx context.Context, octx index.VectorOptimizableContext) (
index.VectorOptimizableContext, error) {
o, ok := s.vectorReader.(index.VectorOptimizable)
if ok {
return o.VectorOptimize(ctx, octx)
}
return nil, nil
}
func (s *KNNSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (
*search.DocumentMatch, error) {
knnMatch, err := s.vectorReader.Advance(ID, s.vd.Reset())
if err != nil {
return nil, err
}
if knnMatch == nil {
return nil, nil
}
docMatch := s.scorer.Score(ctx, knnMatch)
return docMatch, nil
}
func (s *KNNSearcher) Close() error {
return s.vectorReader.Close()
}
func (s *KNNSearcher) Count() uint64 {
return s.vectorReader.Count()
}
func (s *KNNSearcher) DocumentMatchPoolSize() int {
return 1
}
func (s *KNNSearcher) Min() int {
return 0
}
func (s *KNNSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
knnMatch, err := s.vectorReader.Next(s.vd.Reset())
if err != nil {
return nil, err
}
if knnMatch == nil {
return nil, nil
}
docMatch := s.scorer.Score(ctx, knnMatch)
return docMatch, nil
}
func (s *KNNSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *KNNSearcher) Size() int {
return reflectStaticSizeKNNSearcher + size.SizeOfPtr +
s.vectorReader.Size() +
s.vd.Size() +
s.scorer.Size()
}
func (s *KNNSearcher) Weight() float64 {
return s.scorer.Weight()
}
================================================
FILE: search/searcher/search_match_all.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/scorer"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeMatchAllSearcher int
func init() {
var mas MatchAllSearcher
reflectStaticSizeMatchAllSearcher = int(reflect.TypeOf(mas).Size())
}
type MatchAllSearcher struct {
indexReader index.IndexReader
reader index.DocIDReader
scorer *scorer.ConstantScorer
count uint64
}
func NewMatchAllSearcher(ctx context.Context, indexReader index.IndexReader, boost float64, options search.SearcherOptions) (*MatchAllSearcher, error) {
reader, err := indexReader.DocIDReaderAll()
if err != nil {
return nil, err
}
count, err := indexReader.DocCount()
if err != nil {
_ = reader.Close()
return nil, err
}
scorer := scorer.NewConstantScorer(1.0, boost, options)
return &MatchAllSearcher{
indexReader: indexReader,
reader: reader,
scorer: scorer,
count: count,
}, nil
}
func (s *MatchAllSearcher) Size() int {
return reflectStaticSizeMatchAllSearcher + size.SizeOfPtr +
s.reader.Size() +
s.scorer.Size()
}
func (s *MatchAllSearcher) Count() uint64 {
return s.count
}
func (s *MatchAllSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *MatchAllSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *MatchAllSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
id, err := s.reader.Next()
if err != nil {
return nil, err
}
if id == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(ctx, id)
// return doc match
return docMatch, nil
}
func (s *MatchAllSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
id, err := s.reader.Advance(ID)
if err != nil {
return nil, err
}
if id == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(ctx, id)
// return doc match
return docMatch, nil
}
func (s *MatchAllSearcher) Close() error {
return s.reader.Close()
}
func (s *MatchAllSearcher) Min() int {
return 0
}
func (s *MatchAllSearcher) DocumentMatchPoolSize() int {
return 1
}
================================================
FILE: search/searcher/search_match_all_test.go
================================================
// Copyright (c) 2013 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"testing"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestMatchAllSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
explainTrue := search.SearcherOptions{Explain: true}
allSearcher, err := NewMatchAllSearcher(context.TODO(), twoDocIndexReader, 1.0, explainTrue)
if err != nil {
t.Fatal(err)
}
allSearcher2, err := NewMatchAllSearcher(context.TODO(), twoDocIndexReader, 1.2, explainTrue)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
queryNorm float64
results []*search.DocumentMatch
}{
{
searcher: allSearcher,
queryNorm: 1.0,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("1"),
Score: 1.0,
},
{
IndexInternalID: index.IndexInternalID("2"),
Score: 1.0,
},
{
IndexInternalID: index.IndexInternalID("3"),
Score: 1.0,
},
{
IndexInternalID: index.IndexInternalID("4"),
Score: 1.0,
},
{
IndexInternalID: index.IndexInternalID("5"),
Score: 1.0,
},
},
},
{
searcher: allSearcher2,
queryNorm: 0.8333333,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("1"),
Score: 1.0,
},
{
IndexInternalID: index.IndexInternalID("2"),
Score: 1.0,
},
{
IndexInternalID: index.IndexInternalID("3"),
Score: 1.0,
},
{
IndexInternalID: index.IndexInternalID("4"),
Score: 1.0,
},
{
IndexInternalID: index.IndexInternalID("5"),
Score: 1.0,
},
},
},
}
for testIndex, test := range tests {
if test.queryNorm != 1.0 {
test.searcher.SetQueryNorm(test.queryNorm)
}
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
}
next, err := test.searcher.Next(ctx)
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
ctx.DocumentMatchPool.Put(next)
next, err = test.searcher.Next(ctx)
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}
================================================
FILE: search/searcher/search_match_none.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeMatchNoneSearcher int
func init() {
var mns MatchNoneSearcher
reflectStaticSizeMatchNoneSearcher = int(reflect.TypeOf(mns).Size())
}
type MatchNoneSearcher struct {
indexReader index.IndexReader
}
func NewMatchNoneSearcher(indexReader index.IndexReader) (*MatchNoneSearcher, error) {
return &MatchNoneSearcher{
indexReader: indexReader,
}, nil
}
func (s *MatchNoneSearcher) Size() int {
return reflectStaticSizeMatchNoneSearcher + size.SizeOfPtr
}
func (s *MatchNoneSearcher) Count() uint64 {
return uint64(0)
}
func (s *MatchNoneSearcher) Weight() float64 {
return 0.0
}
func (s *MatchNoneSearcher) SetQueryNorm(qnorm float64) {
}
func (s *MatchNoneSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
return nil, nil
}
func (s *MatchNoneSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
return nil, nil
}
func (s *MatchNoneSearcher) Close() error {
return nil
}
func (s *MatchNoneSearcher) Min() int {
return 0
}
func (s *MatchNoneSearcher) DocumentMatchPoolSize() int {
return 0
}
================================================
FILE: search/searcher/search_match_none_test.go
================================================
// Copyright (c) 2013 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"testing"
"github.com/blevesearch/bleve/v2/search"
)
func TestMatchNoneSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
noneSearcher, err := NewMatchNoneSearcher(twoDocIndexReader)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
}{
{
searcher: noneSearcher,
results: []*search.DocumentMatch{},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
}
next, err := test.searcher.Next(ctx)
i := 0
for err == nil && next != nil {
if i < len(test.results) {
if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) {
t.Errorf("expected result %d to have id %s got %s for test %d", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex)
}
if !scoresCloseEnough(next.Score, test.results[i].Score) {
t.Errorf("expected result %d to have score %v got %v for test %d", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s", next.Expl)
}
}
ctx.DocumentMatchPool.Put(next)
next, err = test.searcher.Next(ctx)
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}
================================================
FILE: search/searcher/search_multi_term.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"fmt"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func NewMultiTermSearcher(ctx context.Context, indexReader index.IndexReader, terms []string,
field string, boost float64, options search.SearcherOptions, limit bool) (
search.Searcher, error) {
if tooManyClauses(len(terms)) {
if optionsDisjunctionOptimizable(options) {
return optimizeMultiTermSearcher(ctx, indexReader, terms, field, boost, options)
}
if limit {
return nil, tooManyClausesErr(field, len(terms))
}
}
qsearchers, err := makeBatchSearchers(ctx, indexReader, terms, field, boost, options)
if err != nil {
return nil, err
}
// build disjunction searcher of these ranges
return newMultiTermSearcherInternal(ctx, indexReader, qsearchers, field, boost,
options, limit)
}
// Works similarly to the multi term searcher but additionally boosts individual terms based on
// their edit distance from the query terms
func NewMultiTermSearcherBoosted(ctx context.Context, indexReader index.IndexReader, terms []string,
field string, boost float64, editDistances []uint8, options search.SearcherOptions, limit bool) (
search.Searcher, error) {
if tooManyClauses(len(terms)) {
if optionsDisjunctionOptimizable(options) {
return optimizeMultiTermSearcher(ctx, indexReader, terms, field, boost, options)
}
if limit {
return nil, tooManyClausesErr(field, len(terms))
}
}
qsearchers, err := makeBatchSearchersBoosted(ctx, indexReader, terms, field, boost, editDistances, options)
if err != nil {
return nil, err
}
// build disjunction searcher of these ranges
return newMultiTermSearcherInternal(ctx, indexReader, qsearchers, field, boost,
options, limit)
}
func NewMultiTermSearcherBytes(ctx context.Context, indexReader index.IndexReader, terms [][]byte,
field string, boost float64, options search.SearcherOptions, limit bool) (
search.Searcher, error) {
if tooManyClauses(len(terms)) {
if optionsDisjunctionOptimizable(options) {
return optimizeMultiTermSearcherBytes(ctx, indexReader, terms, field, boost, options)
}
if limit {
return nil, tooManyClausesErr(field, len(terms))
}
}
qsearchers, err := makeBatchSearchersBytes(ctx, indexReader, terms, field, boost, options)
if err != nil {
return nil, err
}
// build disjunction searcher of these ranges
return newMultiTermSearcherInternal(ctx, indexReader, qsearchers, field, boost,
options, limit)
}
func newMultiTermSearcherInternal(ctx context.Context, indexReader index.IndexReader,
searchers []search.Searcher, field string, boost float64,
options search.SearcherOptions, limit bool) (
search.Searcher, error) {
// build disjunction searcher of these ranges
searcher, err := newDisjunctionSearcher(ctx, indexReader, searchers, 0, options,
limit)
if err != nil {
for _, s := range searchers {
_ = s.Close()
}
return nil, err
}
return searcher, nil
}
func optimizeMultiTermSearcher(ctx context.Context, indexReader index.IndexReader, terms []string,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
var finalSearcher search.Searcher
for len(terms) > 0 {
var batchTerms []string
if len(terms) > DisjunctionMaxClauseCount {
batchTerms = terms[:DisjunctionMaxClauseCount]
terms = terms[DisjunctionMaxClauseCount:]
} else {
batchTerms = terms
terms = nil
}
batch, err := makeBatchSearchers(ctx, indexReader, batchTerms, field, boost, options)
if err != nil {
return nil, err
}
if finalSearcher != nil {
batch = append(batch, finalSearcher)
}
cleanup := func() {
for _, searcher := range batch {
if searcher != nil {
_ = searcher.Close()
}
}
}
finalSearcher, err = optimizeCompositeSearcher(ctx, "disjunction:unadorned",
indexReader, batch, options)
// all searchers in batch should be closed, regardless of error or optimization failure
// either we're returning, or continuing and only finalSearcher is needed for next loop
cleanup()
if err != nil {
return nil, err
}
if finalSearcher == nil {
return nil, fmt.Errorf("unable to optimize")
}
}
return finalSearcher, nil
}
func makeBatchSearchers(ctx context.Context, indexReader index.IndexReader, terms []string, field string,
boost float64, options search.SearcherOptions) ([]search.Searcher, error) {
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcher(ctx, indexReader, term, field, boost, options)
if err != nil {
qsearchersClose()
return nil, err
}
}
return qsearchers, nil
}
func makeBatchSearchersBoosted(ctx context.Context, indexReader index.IndexReader, terms []string, field string,
boost float64, editDistances []uint8, options search.SearcherOptions) ([]search.Searcher, error) {
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
for i, term := range terms {
var err error
var editMultiplier float64
if editDistances != nil {
editMultiplier = 1 / float64(editDistances[i]+1)
}
qsearchers[i], err = NewTermSearcher(ctx, indexReader, term, field, boost*editMultiplier, options)
if err != nil {
qsearchersClose()
return nil, err
}
}
return qsearchers, nil
}
func optimizeMultiTermSearcherBytes(ctx context.Context, indexReader index.IndexReader, terms [][]byte,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
var finalSearcher search.Searcher
for len(terms) > 0 {
var batchTerms [][]byte
if len(terms) > DisjunctionMaxClauseCount {
batchTerms = terms[:DisjunctionMaxClauseCount]
terms = terms[DisjunctionMaxClauseCount:]
} else {
batchTerms = terms
terms = nil
}
batch, err := makeBatchSearchersBytes(ctx, indexReader, batchTerms, field, boost, options)
if err != nil {
return nil, err
}
if finalSearcher != nil {
batch = append(batch, finalSearcher)
}
cleanup := func() {
for _, searcher := range batch {
if searcher != nil {
_ = searcher.Close()
}
}
}
finalSearcher, err = optimizeCompositeSearcher(ctx, "disjunction:unadorned",
indexReader, batch, options)
// all searchers in batch should be closed, regardless of error or optimization failure
// either we're returning, or continuing and only finalSearcher is needed for next loop
cleanup()
if err != nil {
return nil, err
}
if finalSearcher == nil {
return nil, fmt.Errorf("unable to optimize")
}
}
return finalSearcher, nil
}
func makeBatchSearchersBytes(ctx context.Context, indexReader index.IndexReader, terms [][]byte, field string,
boost float64, options search.SearcherOptions) ([]search.Searcher, error) {
qsearchers := make([]search.Searcher, len(terms))
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
for i, term := range terms {
var err error
qsearchers[i], err = NewTermSearcherBytes(ctx, indexReader, term, field, boost, options)
if err != nil {
qsearchersClose()
return nil, err
}
}
return qsearchers, nil
}
================================================
FILE: search/searcher/search_numeric_range.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"bytes"
"context"
"math"
"sort"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func NewNumericRangeSearcher(ctx context.Context, indexReader index.IndexReader,
min *float64, max *float64, inclusiveMin, inclusiveMax *bool, field string,
boost float64, options search.SearcherOptions) (search.Searcher, error) {
// account for unbounded edges
if min == nil {
negInf := math.Inf(-1)
min = &negInf
}
if max == nil {
Inf := math.Inf(1)
max = &Inf
}
if inclusiveMin == nil {
defaultInclusiveMin := true
inclusiveMin = &defaultInclusiveMin
}
if inclusiveMax == nil {
defaultInclusiveMax := false
inclusiveMax = &defaultInclusiveMax
}
// find all the ranges
minInt64 := numeric.Float64ToInt64(*min)
if !*inclusiveMin && minInt64 != math.MaxInt64 {
minInt64++
}
maxInt64 := numeric.Float64ToInt64(*max)
if !*inclusiveMax && maxInt64 != math.MinInt64 {
maxInt64--
}
var fieldDict index.FieldDictContains
var dictBytesRead uint64
var isIndexed filterFunc
var err error
if irr, ok := indexReader.(index.IndexReaderContains); ok {
fieldDict, err = irr.FieldDictContains(field)
if err != nil {
return nil, err
}
isIndexed = func(term []byte) bool {
found, err := fieldDict.Contains(term)
return err == nil && found
}
dictBytesRead = fieldDict.BytesRead()
}
// FIXME hard-coded precision, should match field declaration
termRanges := splitInt64Range(minInt64, maxInt64, 4)
terms := termRanges.Enumerate(isIndexed)
if fieldDict != nil {
if fd, ok := fieldDict.(index.FieldDict); ok {
if err = fd.Close(); err != nil {
return nil, err
}
}
}
if len(terms) < 1 {
// reporting back the IO stats with respect to the dictionary
// loaded, using the context
if ctx != nil {
reportIOStats(ctx, dictBytesRead)
search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
}
// cannot return MatchNoneSearcher because of interaction with
// commit f391b991c20f02681bacd197afc6d8aed444e132
return NewMultiTermSearcherBytes(ctx, indexReader, terms, field,
boost, options, true)
}
// for upside_down
if isIndexed == nil {
terms, err = filterCandidateTerms(indexReader, terms, field)
if err != nil {
return nil, err
}
}
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(field, len(terms))
}
if ctx != nil {
reportIOStats(ctx, dictBytesRead)
search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
}
return NewMultiTermSearcherBytes(ctx, indexReader, terms, field,
boost, options, true)
}
func filterCandidateTerms(indexReader index.IndexReader,
terms [][]byte, field string) (rv [][]byte, err error) {
fieldDict, err := indexReader.FieldDictRange(field, terms[0], terms[len(terms)-1])
if err != nil {
return nil, err
}
// enumerate the terms and check against list of terms
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
termBytes := []byte(tfd.Term)
i := sort.Search(len(terms), func(i int) bool { return bytes.Compare(terms[i], termBytes) >= 0 })
if i < len(terms) && bytes.Equal(terms[i], termBytes) {
rv = append(rv, terms[i])
}
terms = terms[i:]
tfd, err = fieldDict.Next()
}
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
return rv, err
}
type termRange struct {
startTerm []byte
endTerm []byte
}
func (t *termRange) Enumerate(filter filterFunc) [][]byte {
var rv [][]byte
next := t.startTerm
for bytes.Compare(next, t.endTerm) <= 0 {
if filter != nil {
if filter(next) {
rv = append(rv, next)
}
} else {
rv = append(rv, next)
}
next = incrementBytes(next)
}
return rv
}
func incrementBytes(in []byte) []byte {
rv := make([]byte, len(in))
copy(rv, in)
for i := len(rv) - 1; i >= 0; i-- {
rv[i] = rv[i] + 1
if rv[i] != 0 {
// didn't overflow, so stop
break
}
}
return rv
}
type termRanges []*termRange
func (tr termRanges) Enumerate(filter filterFunc) [][]byte {
var rv [][]byte
for _, tri := range tr {
trie := tri.Enumerate(filter)
rv = append(rv, trie...)
}
return rv
}
func splitInt64Range(minBound, maxBound int64, precisionStep uint) termRanges {
rv := make(termRanges, 0)
if minBound > maxBound {
return rv
}
for shift := uint(0); ; shift += precisionStep {
diff := int64(1) << (shift + precisionStep)
mask := ((int64(1) << precisionStep) - int64(1)) << shift
hasLower := (minBound & mask) != int64(0)
hasUpper := (maxBound & mask) != mask
var nextMinBound int64
if hasLower {
nextMinBound = (minBound + diff) &^ mask
} else {
nextMinBound = minBound &^ mask
}
var nextMaxBound int64
if hasUpper {
nextMaxBound = (maxBound - diff) &^ mask
} else {
nextMaxBound = maxBound &^ mask
}
lowerWrapped := nextMinBound < minBound
upperWrapped := nextMaxBound > maxBound
if shift+precisionStep >= 64 || nextMinBound > nextMaxBound ||
lowerWrapped || upperWrapped {
// We are in the lowest precision or the next precision is not available.
rv = append(rv, newRange(minBound, maxBound, shift))
// exit the split recursion loop
break
}
if hasLower {
rv = append(rv, newRange(minBound, minBound|mask, shift))
}
if hasUpper {
rv = append(rv, newRange(maxBound&^mask, maxBound, shift))
}
// recurse to next precision
minBound = nextMinBound
maxBound = nextMaxBound
}
return rv
}
func newRange(minBound, maxBound int64, shift uint) *termRange {
maxBound |= (int64(1) << shift) - int64(1)
minBytes := numeric.MustNewPrefixCodedInt64(minBound, shift)
maxBytes := numeric.MustNewPrefixCodedInt64(maxBound, shift)
return newRangeBytes(minBytes, maxBytes)
}
func newRangeBytes(minBytes, maxBytes []byte) *termRange {
return &termRange{
startTerm: minBytes,
endTerm: maxBytes,
}
}
================================================
FILE: search/searcher/search_numeric_range_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/numeric"
)
func TestSplitRange(t *testing.T) {
min := numeric.Float64ToInt64(1.0)
max := numeric.Float64ToInt64(5.0)
ranges := splitInt64Range(min, max, 4)
enumerated := ranges.Enumerate(nil)
if len(enumerated) != 135 {
t.Errorf("expected 135 terms, got %d", len(enumerated))
}
}
func TestIncrementBytes(t *testing.T) {
tests := []struct {
in []byte
out []byte
}{
{
in: []byte{0},
out: []byte{1},
},
{
in: []byte{0, 0},
out: []byte{0, 1},
},
{
in: []byte{0, 255},
out: []byte{1, 0},
},
}
for _, test := range tests {
actual := incrementBytes(test.in)
if !reflect.DeepEqual(actual, test.out) {
t.Errorf("expected %#v, got %#v", test.out, actual)
}
}
}
================================================
FILE: search/searcher/search_phrase.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"fmt"
"math"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizePhraseSearcher int
func init() {
var ps PhraseSearcher
reflectStaticSizePhraseSearcher = int(reflect.TypeOf(ps).Size())
}
type PhraseSearcher struct {
mustSearcher search.Searcher
queryNorm float64
currMust *search.DocumentMatch
terms [][]string
path phrasePath
paths []phrasePath
locations []search.Location
initialized bool
// map a term to a list of fuzzy terms that match it
fuzzyTermMatches map[string][]string
}
func (s *PhraseSearcher) Size() int {
sizeInBytes := reflectStaticSizePhraseSearcher + size.SizeOfPtr
if s.mustSearcher != nil {
sizeInBytes += s.mustSearcher.Size()
}
if s.currMust != nil {
sizeInBytes += s.currMust.Size()
}
for _, entry := range s.terms {
sizeInBytes += size.SizeOfSlice
for _, entry1 := range entry {
sizeInBytes += size.SizeOfString + len(entry1)
}
}
return sizeInBytes
}
func NewPhraseSearcher(ctx context.Context, indexReader index.IndexReader, terms []string,
fuzziness int, autoFuzzy bool, field string, boost float64, options search.SearcherOptions) (*PhraseSearcher, error) {
// turn flat terms []string into [][]string
mterms := make([][]string, len(terms))
for i, term := range terms {
mterms[i] = []string{term}
}
return NewMultiPhraseSearcher(ctx, indexReader, mterms, fuzziness, autoFuzzy, field, boost, options)
}
func NewMultiPhraseSearcher(ctx context.Context, indexReader index.IndexReader, terms [][]string,
fuzziness int, autoFuzzy bool, field string, boost float64, options search.SearcherOptions) (*PhraseSearcher, error) {
options.IncludeTermVectors = true
var termPositionSearchers []search.Searcher
var err error
var ts search.Searcher
// The following logic checks if fuzziness is enabled.
// Fuzziness is considered enabled if either:
// a. `fuzziness` is greater than 0, or
// b. `autoFuzzy` is set to true.
// if both conditions are true, `autoFuzzy` takes precedence.
// If enabled, a map will be created to store the matches for fuzzy terms.
fuzzinessEnabled := autoFuzzy || fuzziness > 0
var fuzzyTermMatches map[string][]string
if fuzzinessEnabled {
fuzzyTermMatches = make(map[string][]string)
ctx = context.WithValue(ctx, search.FuzzyMatchPhraseKey, fuzzyTermMatches)
}
// in case of fuzzy multi-phrase, phrase and match-phrase queries we hardcode the
// prefix length to 0, as setting a per word matching prefix length would not
// make sense from a user perspective.
for _, termPos := range terms {
if len(termPos) == 1 && termPos[0] != "" {
// single term
if fuzzinessEnabled {
// fuzzy
if autoFuzzy {
// auto fuzzy
ts, err = NewAutoFuzzySearcher(ctx, indexReader, termPos[0], 0, field, boost, options)
} else {
// non-auto fuzzy
ts, err = NewFuzzySearcher(ctx, indexReader, termPos[0], 0, fuzziness, field, boost, options)
}
} else {
// non-fuzzy
ts, err = NewTermSearcher(ctx, indexReader, termPos[0], field, boost, options)
}
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building term searcher: %v", err)
}
termPositionSearchers = append(termPositionSearchers, ts)
} else if len(termPos) > 1 {
// multiple terms
var termSearchers []search.Searcher
for _, term := range termPos {
if term == "" {
continue
}
if fuzzinessEnabled {
// fuzzy
if autoFuzzy {
// auto fuzzy
ts, err = NewAutoFuzzySearcher(ctx, indexReader, term, 0, field, boost, options)
} else {
// non-auto fuzzy
ts, err = NewFuzzySearcher(ctx, indexReader, term, 0, fuzziness, field, boost, options)
}
} else {
// non-fuzzy
ts, err = NewTermSearcher(ctx, indexReader, term, field, boost, options)
}
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building term searcher: %v", err)
}
termSearchers = append(termSearchers, ts)
}
disjunction, err := NewDisjunctionSearcher(ctx, indexReader, termSearchers, 1, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building term position disjunction searcher: %v", err)
}
termPositionSearchers = append(termPositionSearchers, disjunction)
}
}
if ctx != nil {
if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
if ts, exists := fts[field]; exists {
if fuzzinessEnabled {
for term, fuzzyTerms := range fuzzyTermMatches {
fuzzySynonymTerms := make([]string, 0, len(fuzzyTerms))
if s, found := ts[term]; found {
fuzzySynonymTerms = append(fuzzySynonymTerms, s...)
}
for _, fuzzyTerm := range fuzzyTerms {
if fuzzyTerm == term {
continue
}
if s, found := ts[fuzzyTerm]; found {
fuzzySynonymTerms = append(fuzzySynonymTerms, s...)
}
}
if len(fuzzySynonymTerms) > 0 {
fuzzyTermMatches[term] = append(fuzzyTermMatches[term], fuzzySynonymTerms...)
}
}
} else {
for _, termPos := range terms {
for _, term := range termPos {
if s, found := ts[term]; found {
if fuzzyTermMatches == nil {
fuzzyTermMatches = make(map[string][]string)
}
fuzzyTermMatches[term] = s
}
}
}
}
}
}
}
mustSearcher, err := NewConjunctionSearcher(ctx, indexReader, termPositionSearchers, options)
if err != nil {
// close any searchers already opened
for _, ts := range termPositionSearchers {
_ = ts.Close()
}
return nil, fmt.Errorf("phrase searcher error building conjunction searcher: %v", err)
}
// build our searcher
rv := PhraseSearcher{
mustSearcher: mustSearcher,
terms: terms,
fuzzyTermMatches: fuzzyTermMatches,
}
rv.computeQueryNorm()
return &rv, nil
}
func (s *PhraseSearcher) computeQueryNorm() {
// first calculate sum of squared weights
sumOfSquaredWeights := 0.0
if s.mustSearcher != nil {
sumOfSquaredWeights += s.mustSearcher.Weight()
}
// now compute query norm from this
s.queryNorm = 1.0 / math.Sqrt(sumOfSquaredWeights)
// finally tell all the downstream searchers the norm
if s.mustSearcher != nil {
s.mustSearcher.SetQueryNorm(s.queryNorm)
}
}
func (s *PhraseSearcher) initSearchers(ctx *search.SearchContext) error {
err := s.advanceNextMust(ctx)
if err != nil {
return err
}
s.initialized = true
return nil
}
func (s *PhraseSearcher) advanceNextMust(ctx *search.SearchContext) error {
var err error
if s.mustSearcher != nil {
if s.currMust != nil {
ctx.DocumentMatchPool.Put(s.currMust)
}
s.currMust, err = s.mustSearcher.Next(ctx)
if err != nil {
return err
}
}
return nil
}
func (s *PhraseSearcher) Weight() float64 {
return s.mustSearcher.Weight()
}
func (s *PhraseSearcher) SetQueryNorm(qnorm float64) {
s.mustSearcher.SetQueryNorm(qnorm)
}
func (s *PhraseSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
for s.currMust != nil {
// check this match against phrase constraints
rv := s.checkCurrMustMatch(ctx)
// prepare for next iteration (either loop or subsequent call to Next())
err := s.advanceNextMust(ctx)
if err != nil {
return nil, err
}
// if match satisfied phrase constraints return it as a hit
if rv != nil {
return rv, nil
}
}
return nil, nil
}
// checkCurrMustMatch is solely concerned with determining if the DocumentMatch
// pointed to by s.currMust (which satisfies the pre-condition searcher)
// also satisfies the phrase constraints. if so, it returns a DocumentMatch
// for this document, otherwise nil
func (s *PhraseSearcher) checkCurrMustMatch(ctx *search.SearchContext) *search.DocumentMatch {
s.locations = s.currMust.Complete(s.locations)
locations := s.currMust.Locations
s.currMust.Locations = nil
ftls := s.currMust.FieldTermLocations
// typically we would expect there to only actually be results in
// one field, but we allow for this to not be the case
// but, we note that phrase constraints can only be satisfied within
// a single field, so we can check them each independently
for field, tlm := range locations {
ftls = s.checkCurrMustMatchField(ctx, field, tlm, ftls)
}
if len(ftls) > 0 {
// return match
rv := s.currMust
s.currMust = nil
rv.FieldTermLocations = ftls
return rv
}
return nil
}
// checkCurrMustMatchField is solely concerned with determining if one
// particular field within the currMust DocumentMatch Locations
// satisfies the phrase constraints (possibly more than once). if so,
// the matching field term locations are appended to the provided
// slice
func (s *PhraseSearcher) checkCurrMustMatchField(ctx *search.SearchContext,
field string, tlm search.TermLocationMap,
ftls []search.FieldTermLocation) []search.FieldTermLocation {
if s.path == nil {
s.path = make(phrasePath, 0, len(s.terms))
}
var tlmPtr *search.TermLocationMap = &tlm
if s.fuzzyTermMatches != nil {
// if fuzzy search, we need to expand the tlm to include all the fuzzy matches
// Example - term is "foo" and fuzzy matches are "foo", "fool", "food"
// the non expanded tlm will be:
// foo -> Locations[foo]
// fool -> Locations[fool]
// food -> Locations[food]
// the expanded tlm will be:
// foo -> [Locations[foo], Locations[fool], Locations[food]]
expandedTlm := make(search.TermLocationMap)
s.expandFuzzyMatches(tlm, expandedTlm)
tlmPtr = &expandedTlm
}
s.paths = findPhrasePaths(0, nil, s.terms, *tlmPtr, s.path[:0], 0, s.paths[:0])
for _, p := range s.paths {
for _, pp := range p {
ftls = append(ftls, search.FieldTermLocation{
Field: field,
Term: pp.term,
Location: search.Location{
Pos: pp.loc.Pos,
Start: pp.loc.Start,
End: pp.loc.End,
ArrayPositions: pp.loc.ArrayPositions,
},
})
}
}
return ftls
}
func (s *PhraseSearcher) expandFuzzyMatches(tlm search.TermLocationMap, expandedTlm search.TermLocationMap) {
for term, fuzzyMatches := range s.fuzzyTermMatches {
locations := tlm[term]
for _, fuzzyMatch := range fuzzyMatches {
if fuzzyMatch == term {
continue
}
locations = append(locations, tlm[fuzzyMatch]...)
}
expandedTlm[term] = locations
}
}
type phrasePart struct {
term string
loc *search.Location
}
func (p *phrasePart) String() string {
return fmt.Sprintf("[%s %v]", p.term, p.loc)
}
type phrasePath []phrasePart
func (p phrasePath) MergeInto(in search.TermLocationMap) {
for _, pp := range p {
in[pp.term] = append(in[pp.term], pp.loc)
}
}
func (p phrasePath) String() string {
rv := "["
for i, pp := range p {
if i > 0 {
rv += ", "
}
rv += pp.String()
}
rv += "]"
return rv
}
// findPhrasePaths is a function to identify phrase matches from a set
// of known term locations. it recursive so care must be taken with
// arguments and return values.
//
// prevPos - the previous location, 0 on first invocation
//
// ap - array positions of the first candidate phrase part to
// which further recursive phrase parts must match,
// nil on initial invocation or when there are no array positions
//
// phraseTerms - slice containing the phrase terms,
// may contain empty string as placeholder (don't care)
//
// tlm - the Term Location Map containing all relevant term locations
//
// p - the current path being explored (appended to in recursive calls)
// this is the primary state being built during the traversal
//
// remainingSlop - amount of sloppiness that's allowed, which is the
// sum of the editDistances from each matching phrase part, where 0 means no
// sloppiness allowed (all editDistances must be 0), decremented during recursion
//
// rv - the final result being appended to by all the recursive calls
//
// returns slice of paths, or nil if invocation did not find any successful paths
func findPhrasePaths(prevPos uint64, ap search.ArrayPositions, phraseTerms [][]string,
tlm search.TermLocationMap, p phrasePath, remainingSlop int, rv []phrasePath) []phrasePath {
// no more terms
if len(phraseTerms) < 1 {
// snapshot or copy the recursively built phrasePath p and
// append it to the rv, also optimizing by checking if next
// phrasePath item in the rv (which we're about to overwrite)
// is available for reuse
var pcopy phrasePath
if len(rv) < cap(rv) {
pcopy = rv[:len(rv)+1][len(rv)][:0]
}
return append(rv, append(pcopy, p...))
}
car := phraseTerms[0]
cdr := phraseTerms[1:]
// empty term is treated as match (continue)
if len(car) == 0 || (len(car) == 1 && car[0] == "") {
nextPos := prevPos + 1
if prevPos == 0 {
// if prevPos was 0, don't set it to 1 (as that's not a real abs pos)
nextPos = 0 // don't advance nextPos if prevPos was 0
}
return findPhrasePaths(nextPos, ap, cdr, tlm, p, remainingSlop, rv)
}
// locations for this term
for _, carTerm := range car {
locations := tlm[carTerm]
LOCATIONS_LOOP:
for _, loc := range locations {
if prevPos != 0 && !loc.ArrayPositions.Equals(ap) {
// if the array positions are wrong, can't match, try next location
continue
}
// compute distance from previous phrase term
dist := 0
if prevPos != 0 {
dist = editDistance(prevPos+1, loc.Pos)
}
// if enough slop remaining, continue recursively
if prevPos == 0 || (remainingSlop-dist) >= 0 {
// skip if we've already used this term+loc already
for _, ppart := range p {
if ppart.term == carTerm && ppart.loc == loc {
continue LOCATIONS_LOOP
}
}
// this location works, add it to the path (but not for empty term)
px := append(p, phrasePart{term: carTerm, loc: loc})
rv = findPhrasePaths(loc.Pos, loc.ArrayPositions, cdr, tlm, px, remainingSlop-dist, rv)
}
}
}
return rv
}
func editDistance(p1, p2 uint64) int {
dist := int(p1 - p2)
if dist < 0 {
return -dist
}
return dist
}
func (s *PhraseSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
if !s.initialized {
err := s.initSearchers(ctx)
if err != nil {
return nil, err
}
}
if s.currMust != nil {
if s.currMust.IndexInternalID.Compare(ID) >= 0 {
return s.Next(ctx)
}
ctx.DocumentMatchPool.Put(s.currMust)
}
if s.currMust == nil {
return nil, nil
}
var err error
s.currMust, err = s.mustSearcher.Advance(ctx, ID)
if err != nil {
return nil, err
}
return s.Next(ctx)
}
func (s *PhraseSearcher) Count() uint64 {
// for now return a worst case
return s.mustSearcher.Count()
}
func (s *PhraseSearcher) Close() error {
if s.mustSearcher != nil {
err := s.mustSearcher.Close()
if err != nil {
return err
}
}
return nil
}
func (s *PhraseSearcher) Min() int {
return 0
}
func (s *PhraseSearcher) DocumentMatchPoolSize() int {
return s.mustSearcher.DocumentMatchPoolSize() + 1
}
================================================
FILE: search/searcher/search_phrase_test.go
================================================
// Copyright (c) 2013 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"reflect"
"testing"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestPhraseSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
soptions := search.SearcherOptions{Explain: true, IncludeTermVectors: true}
phraseSearcher, err := NewPhraseSearcher(context.TODO(), twoDocIndexReader, []string{"angst", "beer"}, 0, false, "desc", 1.0, soptions)
if err != nil {
t.Fatal(err)
}
tests := []struct {
searcher search.Searcher
results []*search.DocumentMatch
locations map[string]map[string][]search.Location
fieldterms [][2]string
}{
{
searcher: phraseSearcher,
results: []*search.DocumentMatch{
{
IndexInternalID: index.IndexInternalID("2"),
Score: 1.0807601687084403,
},
},
locations: map[string]map[string][]search.Location{"desc": {"beer": {{Pos: 2, Start: 6, End: 10}}, "angst": {{Pos: 1, Start: 0, End: 5}}}},
fieldterms: [][2]string{{"desc", "beer"}, {"desc", "angst"}},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
}
next, err := test.searcher.Next(ctx)
i := 0
for err == nil && next != nil {
next.Complete(nil)
if i < len(test.results) {
if !next.IndexInternalID.Equals(test.results[i].IndexInternalID) {
t.Errorf("expected result %d to have id %s got %s for test %d\n", i, test.results[i].IndexInternalID, next.IndexInternalID, testIndex)
}
if next.Score != test.results[i].Score {
t.Errorf("expected result %d to have score %v got %v for test %d\n", i, test.results[i].Score, next.Score, testIndex)
t.Logf("scoring explanation: %s\n", next.Expl)
}
for _, ft := range test.fieldterms {
locs := next.Locations[ft[0]][ft[1]]
explocs := test.locations[ft[0]][ft[1]]
if len(explocs) != len(locs) {
t.Fatalf("expected result %d to have %d Locations (%#v) but got %d (%#v) for test %d with field %q and term %q\n", i, len(explocs), explocs, len(locs), locs, testIndex, ft[0], ft[1])
}
for ind, exploc := range explocs {
if !reflect.DeepEqual(*locs[ind], exploc) {
t.Errorf("expected result %d to have Location %v got %v for test %d\n", i, exploc, locs[ind], testIndex)
}
}
}
}
ctx.DocumentMatchPool.Put(next)
next, err = test.searcher.Next(ctx)
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.results) != i {
t.Errorf("expected %d results got %d for test %d", len(test.results), i, testIndex)
}
}
}
func TestMultiPhraseSearch(t *testing.T) {
soptions := search.SearcherOptions{Explain: true, IncludeTermVectors: true}
tests := []struct {
phrase [][]string
docids [][]byte
}{
{
phrase: [][]string{{"angst", "what"}, {"beer"}},
docids: [][]byte{[]byte("2")},
},
}
for i, test := range tests {
reader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
searcher, err := NewMultiPhraseSearcher(context.TODO(), reader, test.phrase, 0, false, "desc", 1.0, soptions)
if err != nil {
t.Error(err)
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(searcher.DocumentMatchPoolSize(), 0),
}
next, err := searcher.Next(ctx)
var actualIds [][]byte
for err == nil && next != nil {
actualIds = append(actualIds, next.IndexInternalID)
ctx.DocumentMatchPool.Put(next)
next, err = searcher.Next(ctx)
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, i)
}
if !reflect.DeepEqual(test.docids, actualIds) {
t.Fatalf("expected ids: %v, got %v", test.docids, actualIds)
}
err = searcher.Close()
if err != nil {
t.Error(err)
}
err = reader.Close()
if err != nil {
t.Error(err)
}
}
}
func TestFuzzyMultiPhraseSearch(t *testing.T) {
soptions := search.SearcherOptions{Explain: true, IncludeTermVectors: true}
tests := []struct {
mphrase [][]string
docids [][]byte
fuzziness int
prefix int
}{
{
mphrase: [][]string{{"pale", "anger"}, {"best"}, {"colon", "porch"}},
docids: [][]byte{[]byte("2"), []byte("3")},
fuzziness: 2,
},
{
mphrase: [][]string{{"pale", "anger"}, {}, {"colon", "porch", "could"}},
docids: nil,
fuzziness: 1,
},
{
mphrase: [][]string{{"app"}, {"best"}, {"volume"}},
docids: [][]byte{[]byte("3")},
fuzziness: 2,
},
{
mphrase: [][]string{{"anger", "pale", "bar"}, {"beard"}, {}, {}},
docids: [][]byte{[]byte("1"), []byte("2"), []byte("3"), []byte("4")},
fuzziness: 2,
},
{
mphrase: [][]string{{"anger", "pale", "bar"}, {}, {"beard"}, {}},
docids: [][]byte{[]byte("1"), []byte("4")},
fuzziness: 2,
},
}
for i, test := range tests {
reader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
searcher, err := NewMultiPhraseSearcher(context.TODO(), reader, test.mphrase, test.fuzziness, false, "desc", 1.0, soptions)
if err != nil {
t.Error(err)
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(searcher.DocumentMatchPoolSize(), 0),
}
next, err := searcher.Next(ctx)
var actualIds [][]byte
for err == nil && next != nil {
actualIds = append(actualIds, next.IndexInternalID)
ctx.DocumentMatchPool.Put(next)
next, err = searcher.Next(ctx)
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, i)
}
if !reflect.DeepEqual(test.docids, actualIds) {
t.Fatalf("expected ids: %v, got %v", test.docids, actualIds)
}
err = searcher.Close()
if err != nil {
t.Error(err)
}
err = reader.Close()
if err != nil {
t.Error(err)
}
}
}
func TestFindPhrasePaths(t *testing.T) {
tests := []struct {
phrase [][]string
tlm search.TermLocationMap
paths []phrasePath
}{
// simplest matching case
{
phrase: [][]string{{"cat"}, {"dog"}},
tlm: search.TermLocationMap{
"cat": search.Locations{
&search.Location{
Pos: 1,
},
},
"dog": search.Locations{
&search.Location{
Pos: 2,
},
},
},
paths: []phrasePath{
{
phrasePart{"cat", &search.Location{Pos: 1}},
phrasePart{"dog", &search.Location{Pos: 2}},
},
},
},
// second term missing, no match
{
phrase: [][]string{{"cat"}, {"dog"}},
tlm: search.TermLocationMap{
"cat": search.Locations{
&search.Location{
Pos: 1,
},
},
},
paths: nil,
},
// second term exists but in wrong position
{
phrase: [][]string{{"cat"}, {"dog"}},
tlm: search.TermLocationMap{
"cat": search.Locations{
&search.Location{
Pos: 1,
},
},
"dog": search.Locations{
&search.Location{
Pos: 3,
},
},
},
paths: nil,
},
// matches multiple times
{
phrase: [][]string{{"cat"}, {"dog"}},
tlm: search.TermLocationMap{
"cat": search.Locations{
&search.Location{
Pos: 1,
},
&search.Location{
Pos: 8,
},
},
"dog": search.Locations{
&search.Location{
Pos: 2,
},
&search.Location{
Pos: 9,
},
},
},
paths: []phrasePath{
{
phrasePart{"cat", &search.Location{Pos: 1}},
phrasePart{"dog", &search.Location{Pos: 2}},
},
{
phrasePart{"cat", &search.Location{Pos: 8}},
phrasePart{"dog", &search.Location{Pos: 9}},
},
},
},
// match over gaps
{
phrase: [][]string{{"cat"}, {""}, {"dog"}},
tlm: search.TermLocationMap{
"cat": search.Locations{
&search.Location{
Pos: 1,
},
},
"dog": search.Locations{
&search.Location{
Pos: 3,
},
},
},
paths: []phrasePath{
{
phrasePart{"cat", &search.Location{Pos: 1}},
phrasePart{"dog", &search.Location{Pos: 3}},
},
},
},
// match with leading ""
{
phrase: [][]string{{""}, {"cat"}, {"dog"}},
tlm: search.TermLocationMap{
"cat": search.Locations{
&search.Location{
Pos: 2,
},
},
"dog": search.Locations{
&search.Location{
Pos: 3,
},
},
},
paths: []phrasePath{
{
phrasePart{"cat", &search.Location{Pos: 2}},
phrasePart{"dog", &search.Location{Pos: 3}},
},
},
},
// match with trailing ""
{
phrase: [][]string{{"cat"}, {"dog"}, {""}},
tlm: search.TermLocationMap{
"cat": search.Locations{
&search.Location{
Pos: 2,
},
},
"dog": search.Locations{
&search.Location{
Pos: 3,
},
},
},
paths: []phrasePath{
{
phrasePart{"cat", &search.Location{Pos: 2}},
phrasePart{"dog", &search.Location{Pos: 3}},
},
},
},
}
for i, test := range tests {
actualPaths := findPhrasePaths(0, nil, test.phrase, test.tlm, nil, 0, nil)
if !reflect.DeepEqual(actualPaths, test.paths) {
t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
}
}
}
func TestFindPhrasePathsSloppy(t *testing.T) {
tlm := search.TermLocationMap{
"one": search.Locations{
&search.Location{
Pos: 1,
},
},
"two": search.Locations{
&search.Location{
Pos: 2,
},
},
"three": search.Locations{
&search.Location{
Pos: 3,
},
},
"four": search.Locations{
&search.Location{
Pos: 4,
},
},
"five": search.Locations{
&search.Location{
Pos: 5,
},
},
}
tests := []struct {
phrase [][]string
paths []phrasePath
slop int
tlm search.TermLocationMap
}{
// no match
{
phrase: [][]string{{"one"}, {"five"}},
slop: 2,
},
// should match
{
phrase: [][]string{{"one"}, {"five"}},
slop: 3,
paths: []phrasePath{
{
phrasePart{"one", &search.Location{Pos: 1}},
phrasePart{"five", &search.Location{Pos: 5}},
},
},
},
// slop 0 finds exact match
{
phrase: [][]string{{"four"}, {"five"}},
slop: 0,
paths: []phrasePath{
{
phrasePart{"four", &search.Location{Pos: 4}},
phrasePart{"five", &search.Location{Pos: 5}},
},
},
},
// slop 0 does not find exact match (reversed)
{
phrase: [][]string{{"two"}, {"one"}},
slop: 0,
},
// slop 1 finds exact match
{
phrase: [][]string{{"one"}, {"two"}},
slop: 1,
paths: []phrasePath{
{
phrasePart{"one", &search.Location{Pos: 1}},
phrasePart{"two", &search.Location{Pos: 2}},
},
},
},
// slop 1 *still* does not find exact match (reversed) requires at least 2
{
phrase: [][]string{{"two"}, {"one"}},
slop: 1,
},
// slop 2 does finds exact match reversed
{
phrase: [][]string{{"two"}, {"one"}},
slop: 2,
paths: []phrasePath{
{
phrasePart{"two", &search.Location{Pos: 2}},
phrasePart{"one", &search.Location{Pos: 1}},
},
},
},
// slop 2 not enough for this
{
phrase: [][]string{{"three"}, {"one"}},
slop: 2,
},
// slop should be cumulative
{
phrase: [][]string{{"one"}, {"three"}, {"five"}},
slop: 2,
paths: []phrasePath{
{
phrasePart{"one", &search.Location{Pos: 1}},
phrasePart{"three", &search.Location{Pos: 3}},
phrasePart{"five", &search.Location{Pos: 5}},
},
},
},
// should require 6
{
phrase: [][]string{{"five"}, {"three"}, {"one"}},
slop: 5,
},
// so lets try 6
{
phrase: [][]string{{"five"}, {"three"}, {"one"}},
slop: 6,
paths: []phrasePath{
{
phrasePart{"five", &search.Location{Pos: 5}},
phrasePart{"three", &search.Location{Pos: 3}},
phrasePart{"one", &search.Location{Pos: 1}},
},
},
},
// test an append() related edge case, where append()'s
// current behavior needs to be called 3 times starting from a
// nil slice before it grows to a slice with extra capacity --
// hence, 3 initial terms of ark, bat, cat
{
phrase: [][]string{
{"ark"}, {"bat"}, {"cat"}, {"dog"},
},
slop: 1,
paths: []phrasePath{
{
phrasePart{"ark", &search.Location{Pos: 1}},
phrasePart{"bat", &search.Location{Pos: 2}},
phrasePart{"cat", &search.Location{Pos: 3}},
phrasePart{"dog", &search.Location{Pos: 4}},
},
{
phrasePart{"ark", &search.Location{Pos: 1}},
phrasePart{"bat", &search.Location{Pos: 2}},
phrasePart{"cat", &search.Location{Pos: 3}},
phrasePart{"dog", &search.Location{Pos: 5}},
},
},
tlm: search.TermLocationMap{ // ark bat cat dog dog
"ark": search.Locations{
&search.Location{Pos: 1},
},
"bat": search.Locations{
&search.Location{Pos: 2},
},
"cat": search.Locations{
&search.Location{Pos: 3},
},
"dog": search.Locations{
&search.Location{Pos: 4},
&search.Location{Pos: 5},
},
},
},
// test that we don't see multiple hits from the same location
{
phrase: [][]string{
{"cat"}, {"dog"}, {"dog"},
},
slop: 1,
paths: []phrasePath{
{
phrasePart{"cat", &search.Location{Pos: 1}},
phrasePart{"dog", &search.Location{Pos: 2}},
phrasePart{"dog", &search.Location{Pos: 3}},
},
},
tlm: search.TermLocationMap{ // cat dog dog
"cat": search.Locations{
&search.Location{Pos: 1},
},
"dog": search.Locations{
&search.Location{Pos: 2},
&search.Location{Pos: 3},
},
},
},
// test that we don't see multiple hits from the same location
{
phrase: [][]string{
{"cat"}, {"dog"},
},
slop: 10,
paths: []phrasePath{
{
phrasePart{"cat", &search.Location{Pos: 1}},
phrasePart{"dog", &search.Location{Pos: 2}},
},
{
phrasePart{"cat", &search.Location{Pos: 1}},
phrasePart{"dog", &search.Location{Pos: 4}},
},
{
phrasePart{"cat", &search.Location{Pos: 3}},
phrasePart{"dog", &search.Location{Pos: 2}},
},
{
phrasePart{"cat", &search.Location{Pos: 3}},
phrasePart{"dog", &search.Location{Pos: 4}},
},
},
tlm: search.TermLocationMap{ // cat dog cat dog
"cat": search.Locations{
&search.Location{Pos: 1},
&search.Location{Pos: 3},
},
"dog": search.Locations{
&search.Location{Pos: 2},
&search.Location{Pos: 4},
},
},
},
}
for i, test := range tests {
tlmToUse := test.tlm
if tlmToUse == nil {
tlmToUse = tlm
}
actualPaths := findPhrasePaths(0, nil, test.phrase, tlmToUse, nil, test.slop, nil)
if !reflect.DeepEqual(actualPaths, test.paths) {
t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
}
}
}
func TestFindPhrasePathsSloppyPalyndrome(t *testing.T) {
tlm := search.TermLocationMap{
"one": search.Locations{
&search.Location{
Pos: 1,
},
&search.Location{
Pos: 5,
},
},
"two": search.Locations{
&search.Location{
Pos: 2,
},
&search.Location{
Pos: 4,
},
},
"three": search.Locations{
&search.Location{
Pos: 3,
},
},
}
tests := []struct {
phrase [][]string
paths []phrasePath
slop int
}{
// search non palyndrone, exact match
{
phrase: [][]string{{"two"}, {"three"}},
slop: 0,
paths: []phrasePath{
{
phrasePart{"two", &search.Location{Pos: 2}},
phrasePart{"three", &search.Location{Pos: 3}},
},
},
},
// same with slop 2 (not required) (find it twice)
{
phrase: [][]string{{"two"}, {"three"}},
slop: 2,
paths: []phrasePath{
{
phrasePart{"two", &search.Location{Pos: 2}},
phrasePart{"three", &search.Location{Pos: 3}},
},
{
phrasePart{"two", &search.Location{Pos: 4}},
phrasePart{"three", &search.Location{Pos: 3}},
},
},
},
// palyndrone reversed
{
phrase: [][]string{{"three"}, {"two"}},
slop: 2,
paths: []phrasePath{
{
phrasePart{"three", &search.Location{Pos: 3}},
phrasePart{"two", &search.Location{Pos: 2}},
},
{
phrasePart{"three", &search.Location{Pos: 3}},
phrasePart{"two", &search.Location{Pos: 4}},
},
},
},
}
for i, test := range tests {
actualPaths := findPhrasePaths(0, nil, test.phrase, tlm, nil, test.slop, nil)
if !reflect.DeepEqual(actualPaths, test.paths) {
t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
}
}
}
func TestFindMultiPhrasePaths(t *testing.T) {
tlm := search.TermLocationMap{
"cat": search.Locations{
&search.Location{
Pos: 1,
},
},
"dog": search.Locations{
&search.Location{
Pos: 2,
},
},
"frog": search.Locations{
&search.Location{
Pos: 3,
},
},
}
tests := []struct {
phrase [][]string
paths []phrasePath
}{
// simplest, one of two possible terms matches
{
phrase: [][]string{{"cat", "rat"}, {"dog"}},
paths: []phrasePath{
{
phrasePart{"cat", &search.Location{Pos: 1}},
phrasePart{"dog", &search.Location{Pos: 2}},
},
},
},
// two possible terms, neither work
{
phrase: [][]string{{"cat", "rat"}, {"chicken"}},
},
// two possible terms, one works, but out of position with next
{
phrase: [][]string{{"cat", "rat"}, {"frog"}},
},
// matches multiple times, with different pairing
{
phrase: [][]string{{"cat", "dog"}, {"dog", "frog"}},
paths: []phrasePath{
{
phrasePart{"cat", &search.Location{Pos: 1}},
phrasePart{"dog", &search.Location{Pos: 2}},
},
{
phrasePart{"dog", &search.Location{Pos: 2}},
phrasePart{"frog", &search.Location{Pos: 3}},
},
},
},
// multi-match over a gap
{
phrase: [][]string{{"cat", "rat"}, {""}, {"frog"}},
paths: []phrasePath{
{
phrasePart{"cat", &search.Location{Pos: 1}},
phrasePart{"frog", &search.Location{Pos: 3}},
},
},
},
// multi-match over a gap (same as before, but with empty term list)
{
phrase: [][]string{{"cat", "rat"}, {}, {"frog"}},
paths: []phrasePath{
{
phrasePart{"cat", &search.Location{Pos: 1}},
phrasePart{"frog", &search.Location{Pos: 3}},
},
},
},
// multi-match over a gap (same once again, but nil term list)
{
phrase: [][]string{{"cat", "rat"}, nil, {"frog"}},
paths: []phrasePath{
{
phrasePart{"cat", &search.Location{Pos: 1}},
phrasePart{"frog", &search.Location{Pos: 3}},
},
},
},
}
for i, test := range tests {
actualPaths := findPhrasePaths(0, nil, test.phrase, tlm, nil, 0, nil)
if !reflect.DeepEqual(actualPaths, test.paths) {
t.Fatalf("expected: %v got %v for test %d", test.paths, actualPaths, i)
}
}
}
================================================
FILE: search/searcher/search_regexp.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"regexp"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
// The Regexp interface defines the subset of the regexp.Regexp API
// methods that are used by bleve indexes, allowing callers to pass in
// alternate implementations.
type Regexp interface {
FindStringIndex(s string) (loc []int)
LiteralPrefix() (prefix string, complete bool)
String() string
}
// NewRegexpStringSearcher is similar to NewRegexpSearcher, but
// additionally optimizes for index readers that handle regexp's.
func NewRegexpStringSearcher(ctx context.Context, indexReader index.IndexReader, pattern string,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
ir, ok := indexReader.(index.IndexReaderRegexp)
if !ok {
r, err := regexp.Compile(pattern)
if err != nil {
return nil, err
}
return NewRegexpSearcher(ctx, indexReader, r, field, boost, options)
}
fieldDict, a, err := ir.FieldDictRegexpAutomaton(field, pattern)
if err != nil {
return nil, err
}
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
var termSet = make(map[string]struct{})
var candidateTerms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
if _, exists := termSet[tfd.Term]; !exists {
termSet[tfd.Term] = struct{}{}
candidateTerms = append(candidateTerms, tfd.Term)
tfd, err = fieldDict.Next()
}
}
if err != nil {
return nil, err
}
if ctx != nil {
if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
if ts, exists := fts[field]; exists {
for term := range ts {
if _, exists := termSet[term]; exists {
continue
}
if a.MatchesRegex(term) {
termSet[term] = struct{}{}
candidateTerms = append(candidateTerms, term)
}
}
}
}
}
return NewMultiTermSearcher(ctx, indexReader, candidateTerms, field, boost,
options, true)
}
// NewRegexpSearcher creates a searcher which will match documents that
// contain terms which match the pattern regexp. The match must be EXACT
// matching the entire term. The provided regexp SHOULD NOT start with ^
// or end with $ as this can interfere with the implementation. Separately,
// matches will be checked to ensure they match the entire term.
func NewRegexpSearcher(ctx context.Context, indexReader index.IndexReader, pattern Regexp,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
var candidateTerms []string
var regexpCandidates *regexpCandidates
prefixTerm, complete := pattern.LiteralPrefix()
if complete {
// there is no pattern
candidateTerms = []string{prefixTerm}
} else {
var err error
regexpCandidates, err = findRegexpCandidateTerms(indexReader, pattern, field,
prefixTerm)
if err != nil {
return nil, err
}
}
var dictBytesRead uint64
if regexpCandidates != nil {
candidateTerms = regexpCandidates.candidates
dictBytesRead = regexpCandidates.bytesRead
}
if ctx != nil {
reportIOStats(ctx, dictBytesRead)
search.RecordSearchCost(ctx, search.AddM, dictBytesRead)
}
return NewMultiTermSearcher(ctx, indexReader, candidateTerms, field, boost,
options, true)
}
type regexpCandidates struct {
candidates []string
bytesRead uint64
}
func findRegexpCandidateTerms(indexReader index.IndexReader,
pattern Regexp, field, prefixTerm string) (rv *regexpCandidates, err error) {
rv = ®expCandidates{
candidates: make([]string, 0),
}
var fieldDict index.FieldDict
if len(prefixTerm) > 0 {
fieldDict, err = indexReader.FieldDictPrefix(field, []byte(prefixTerm))
} else {
fieldDict, err = indexReader.FieldDict(field)
}
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
// enumerate the terms and check against regexp
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
matchPos := pattern.FindStringIndex(tfd.Term)
if matchPos != nil && matchPos[0] == 0 && matchPos[1] == len(tfd.Term) {
rv.candidates = append(rv.candidates, tfd.Term)
if tooManyClauses(len(rv.candidates)) {
return rv, tooManyClausesErr(field, len(rv.candidates))
}
}
tfd, err = fieldDict.Next()
}
rv.bytesRead = fieldDict.BytesRead()
return rv, err
}
================================================
FILE: search/searcher/search_regexp_test.go
================================================
// Copyright (c) 2015 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"encoding/binary"
"fmt"
"os"
"regexp"
"testing"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestRegexpSearchUpsideDown(t *testing.T) {
twoDocIndex := initTwoDocUpsideDown()
testRegexpSearch(t, twoDocIndex, internalIDMakerUpsideDown, searcherMaker)
_ = twoDocIndex.Close()
}
func TestRegexpStringSearchUpsideDown(t *testing.T) {
twoDocIndex := initTwoDocUpsideDown()
testRegexpSearch(t, twoDocIndex, internalIDMakerUpsideDown, searcherStringMaker)
_ = twoDocIndex.Close()
}
func TestRegexpSearchScorch(t *testing.T) {
dir, _ := os.MkdirTemp("", "scorchTwoDoc")
defer func() {
_ = os.RemoveAll(dir)
}()
twoDocIndex := initTwoDocScorch(dir)
testRegexpSearch(t, twoDocIndex, internalIDMakerScorch, searcherMaker)
_ = twoDocIndex.Close()
}
func TestRegexpStringSearchScorch(t *testing.T) {
dir, _ := os.MkdirTemp("", "scorchTwoDoc")
defer func() {
_ = os.RemoveAll(dir)
}()
twoDocIndex := initTwoDocScorch(dir)
testRegexpSearch(t, twoDocIndex, internalIDMakerScorch, searcherStringMaker)
_ = twoDocIndex.Close()
}
func internalIDMakerUpsideDown(id int) index.IndexInternalID {
return index.IndexInternalID(fmt.Sprintf("%d", id))
}
func internalIDMakerScorch(id int) index.IndexInternalID {
buf := make([]byte, 8)
binary.BigEndian.PutUint64(buf, uint64(id))
return index.IndexInternalID(buf)
}
func searcherMaker(t *testing.T, ir index.IndexReader, re, field string) search.Searcher {
pattern, err := regexp.Compile(re)
if err != nil {
t.Fatal(err)
}
regexpSearcher, err := NewRegexpSearcher(context.TODO(), ir, pattern, field, 1.0,
search.SearcherOptions{Explain: true})
if err != nil {
t.Fatal(err)
}
return regexpSearcher
}
func searcherStringMaker(t *testing.T, ir index.IndexReader, re, field string) search.Searcher {
regexpSearcher, err := NewRegexpStringSearcher(context.TODO(), ir, re, field, 1.0,
search.SearcherOptions{Explain: true})
if err != nil {
t.Fatal(err)
}
return regexpSearcher
}
func testRegexpSearch(t *testing.T, twoDocIndex index.Index,
internalIDMaker func(int) index.IndexInternalID,
searcherMaker func(t *testing.T, ir index.IndexReader, re, field string) search.Searcher,
) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
regexpSearcher := searcherMaker(t, twoDocIndexReader, "ma.*", "name")
regexpSearcherCo := searcherMaker(t, twoDocIndexReader, "co.*", "desc")
tests := []struct {
searcher search.Searcher
id2score map[string]float64
}{
{
searcher: regexpSearcher,
id2score: map[string]float64{
"1": 1.916290731874155,
},
},
{
searcher: regexpSearcherCo,
id2score: map[string]float64{
"2": 0.33875554280828685,
"3": 0.33875554280828685,
},
},
}
for testIndex, test := range tests {
defer func() {
err := test.searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(test.searcher.DocumentMatchPoolSize(), 0),
}
next, err := test.searcher.Next(ctx)
i := 0
for err == nil && next != nil {
exID, _ := twoDocIndexReader.ExternalID(next.IndexInternalID)
if _, ok := test.id2score[exID]; !ok {
t.Errorf("test %d, found unexpected docID = %v, next = %v", testIndex, exID, next)
} else {
score := test.id2score[exID]
if next.Score != score {
t.Errorf("test %d, expected result %d to have score %v got %v,next: %#v",
testIndex, i, score, next.Score, next)
t.Logf("scoring explanation: %s", next.Expl)
}
}
ctx.DocumentMatchPool.Put(next)
next, err = test.searcher.Next(ctx)
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v for test %d", err, testIndex)
}
if len(test.id2score) != i {
t.Errorf("expected %d results got %d for test %d", len(test.id2score), i, testIndex)
}
}
}
================================================
FILE: search/searcher/search_term.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"fmt"
"math"
"reflect"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/scorer"
"github.com/blevesearch/bleve/v2/size"
index "github.com/blevesearch/bleve_index_api"
)
var reflectStaticSizeTermSearcher int
func init() {
var ts TermSearcher
reflectStaticSizeTermSearcher = int(reflect.TypeOf(ts).Size())
}
type TermSearcher struct {
indexReader index.IndexReader
reader index.TermFieldReader
scorer *scorer.TermQueryScorer
tfd index.TermFieldDoc
}
func NewTermSearcher(ctx context.Context, indexReader index.IndexReader,
term string, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
if isTermQuery(ctx) {
ctx = context.WithValue(ctx, search.QueryTypeKey, search.Term)
}
return NewTermSearcherBytes(ctx, indexReader, []byte(term), field, boost, options)
}
func NewTermSearcherBytes(ctx context.Context, indexReader index.IndexReader,
term []byte, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
if ctx != nil {
if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
if ts, exists := fts[field]; exists {
if s, found := ts[string(term)]; found {
return NewSynonymSearcher(ctx, indexReader, term, s, field, boost, options)
}
}
}
}
needFreqNorm := options.Score != "none"
reader, err := indexReader.TermFieldReader(ctx, term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
if err != nil {
return nil, err
}
return newTermSearcherFromReader(ctx, indexReader, reader, term, field, boost, options)
}
func tfIDFScoreMetrics(indexReader index.IndexReader) (uint64, error) {
// default tf-idf stats
count, err := indexReader.DocCount()
if err != nil {
return 0, err
}
if count == 0 {
return 0, nil
}
return count, nil
}
func bm25ScoreMetrics(ctx context.Context, field string,
indexReader index.IndexReader) (uint64, float64, error) {
var count uint64
var fieldCardinality int
var err error
bm25Stats, ok := ctx.Value(search.BM25StatsKey).(*search.BM25Stats)
if !ok {
count, err = indexReader.DocCount()
if err != nil {
return 0, 0, err
}
if bm25Reader, ok := indexReader.(index.BM25Reader); ok {
fieldCardinality, err = bm25Reader.FieldCardinality(field)
if err != nil {
return 0, 0, err
}
}
} else {
count = uint64(bm25Stats.DocCount)
fieldCardinality, ok = bm25Stats.FieldCardinality[field]
if !ok {
return 0, 0, fmt.Errorf("field stat for bm25 not present %s", field)
}
}
if count == 0 && fieldCardinality == 0 {
return 0, 0, nil
}
return count, math.Ceil(float64(fieldCardinality) / float64(count)), nil
}
func newTermSearcherFromReader(ctx context.Context, indexReader index.IndexReader,
reader index.TermFieldReader, term []byte, field string, boost float64,
options search.SearcherOptions) (*TermSearcher, error) {
var count uint64
var avgDocLength float64
var err error
var similarityModel string
// as a fallback case we track certain stats for tf-idf scoring
if ctx != nil {
if similarityModelCallback, ok := ctx.Value(search.
GetScoringModelCallbackKey).(search.GetScoringModelCallbackFn); ok {
similarityModel = similarityModelCallback()
}
}
switch similarityModel {
case index.BM25Scoring:
count, avgDocLength, err = bm25ScoreMetrics(ctx, field, indexReader)
if err != nil {
_ = reader.Close()
return nil, err
}
case index.TFIDFScoring:
fallthrough
default:
count, err = tfIDFScoreMetrics(indexReader)
if err != nil {
_ = reader.Close()
return nil, err
}
}
scorer := scorer.NewTermQueryScorer(term, field, boost, count, reader.Count(), avgDocLength, options)
return &TermSearcher{
indexReader: indexReader,
reader: reader,
scorer: scorer,
}, nil
}
func NewSynonymSearcher(ctx context.Context, indexReader index.IndexReader, term []byte, synonyms []string, field string, boost float64, options search.SearcherOptions) (search.Searcher, error) {
createTermSearcher := func(term []byte, boostVal float64) (search.Searcher, error) {
needFreqNorm := options.Score != "none"
reader, err := indexReader.TermFieldReader(ctx, term, field, needFreqNorm, needFreqNorm, options.IncludeTermVectors)
if err != nil {
return nil, err
}
return newTermSearcherFromReader(ctx, indexReader, reader, term, field, boostVal, options)
}
// create a searcher for the term itself
termSearcher, err := createTermSearcher(term, boost)
if err != nil {
return nil, err
}
// constituent searchers of the disjunction
qsearchers := make([]search.Searcher, 0, len(synonyms)+1)
// helper method to close all the searchers we've created
// in case of an error
qsearchersClose := func() {
for _, searcher := range qsearchers {
if searcher != nil {
_ = searcher.Close()
}
}
}
qsearchers = append(qsearchers, termSearcher)
// create a searcher for each synonym
for _, synonym := range synonyms {
synonymSearcher, err := createTermSearcher([]byte(synonym), boost/2.0)
if err != nil {
qsearchersClose()
return nil, err
}
qsearchers = append(qsearchers, synonymSearcher)
}
// create a disjunction searcher
rv, err := NewDisjunctionSearcher(ctx, indexReader, qsearchers, 0, options)
if err != nil {
qsearchersClose()
return nil, err
}
return rv, nil
}
func (s *TermSearcher) Size() int {
return reflectStaticSizeTermSearcher + size.SizeOfPtr +
s.reader.Size() +
s.tfd.Size() +
s.scorer.Size()
}
func (s *TermSearcher) Count() uint64 {
return s.reader.Count()
}
func (s *TermSearcher) Weight() float64 {
return s.scorer.Weight()
}
func (s *TermSearcher) SetQueryNorm(qnorm float64) {
s.scorer.SetQueryNorm(qnorm)
}
func (s *TermSearcher) Next(ctx *search.SearchContext) (*search.DocumentMatch, error) {
termMatch, err := s.reader.Next(s.tfd.Reset())
if err != nil {
return nil, err
}
if termMatch == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(ctx, termMatch)
// return doc match
return docMatch, nil
}
func (s *TermSearcher) Advance(ctx *search.SearchContext, ID index.IndexInternalID) (*search.DocumentMatch, error) {
termMatch, err := s.reader.Advance(ID, s.tfd.Reset())
if err != nil {
return nil, err
}
if termMatch == nil {
return nil, nil
}
// score match
docMatch := s.scorer.Score(ctx, termMatch)
// return doc match
return docMatch, nil
}
func (s *TermSearcher) Close() error {
return s.reader.Close()
}
func (s *TermSearcher) Min() int {
return 0
}
func (s *TermSearcher) DocumentMatchPoolSize() int {
return 1
}
func (s *TermSearcher) Optimize(kind string, octx index.OptimizableContext) (
index.OptimizableContext, error) {
o, ok := s.reader.(index.Optimizable)
if ok {
return o.Optimize(kind, octx)
}
return nil, nil
}
func isTermQuery(ctx context.Context) bool {
if ctx != nil {
// if the ctx already has a value set for query type
// it would've been done at a non term searcher level.
_, ok := ctx.Value(search.QueryTypeKey).(string)
return !ok
}
// if the context is nil, then don't set the query type
return false
}
================================================
FILE: search/searcher/search_term_prefix.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"strings"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func NewTermPrefixSearcher(ctx context.Context, indexReader index.IndexReader, prefix string,
field string, boost float64, options search.SearcherOptions) (
search.Searcher, error) {
// find the terms with this prefix
fieldDict, err := indexReader.FieldDictPrefix(field, []byte(prefix))
if err != nil {
return nil, err
}
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
var terms []string
var termSet = make(map[string]struct{})
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
if _, exists := termSet[tfd.Term]; !exists {
termSet[tfd.Term] = struct{}{}
terms = append(terms, tfd.Term)
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(field, len(terms))
}
tfd, err = fieldDict.Next()
}
}
if err != nil {
return nil, err
}
if ctx != nil {
reportIOStats(ctx, fieldDict.BytesRead())
search.RecordSearchCost(ctx, search.AddM, fieldDict.BytesRead())
}
if ctx != nil {
if fts, ok := ctx.Value(search.FieldTermSynonymMapKey).(search.FieldTermSynonymMap); ok {
if ts, exists := fts[field]; exists {
for term := range ts {
if _, exists := termSet[term]; exists {
continue
}
if strings.HasPrefix(term, prefix) {
termSet[term] = struct{}{}
terms = append(terms, term)
if tooManyClauses(len(terms)) {
return nil, tooManyClausesErr(field, len(terms))
}
}
}
}
}
}
// check if the terms are empty or have one term which is the prefix itself
if len(terms) == 0 || (len(terms) == 1 && terms[0] == prefix) {
return NewTermSearcher(ctx, indexReader, prefix, field, boost, options)
}
return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true)
}
================================================
FILE: search/searcher/search_term_range.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func NewTermRangeSearcher(ctx context.Context, indexReader index.IndexReader,
min, max []byte, inclusiveMin, inclusiveMax *bool, field string,
boost float64, options search.SearcherOptions) (search.Searcher, error) {
if inclusiveMin == nil {
defaultInclusiveMin := true
inclusiveMin = &defaultInclusiveMin
}
if inclusiveMax == nil {
defaultInclusiveMax := false
inclusiveMax = &defaultInclusiveMax
}
if min == nil {
min = []byte{}
}
rangeMax := max
if rangeMax != nil {
// the term dictionary range end has an unfortunate implementation
rangeMax = append(rangeMax, 0)
}
// find the terms with this prefix
fieldDict, err := indexReader.FieldDictRange(field, min, rangeMax)
if err != nil {
return nil, err
}
defer func() {
if cerr := fieldDict.Close(); cerr != nil && err == nil {
err = cerr
}
}()
var terms []string
tfd, err := fieldDict.Next()
for err == nil && tfd != nil {
terms = append(terms, tfd.Term)
tfd, err = fieldDict.Next()
}
if err != nil {
return nil, err
}
if len(terms) < 1 {
return NewMatchNoneSearcher(indexReader)
}
if !*inclusiveMin && min != nil && string(min) == terms[0] {
terms = terms[1:]
// check again, as we might have removed only entry
if len(terms) < 1 {
return NewMatchNoneSearcher(indexReader)
}
}
// if our term list included the max, it would be the last item
if !*inclusiveMax && max != nil && string(max) == terms[len(terms)-1] {
terms = terms[:len(terms)-1]
}
if ctx != nil {
reportIOStats(ctx, fieldDict.BytesRead())
search.RecordSearchCost(ctx, search.AddM, fieldDict.BytesRead())
}
return NewMultiTermSearcher(ctx, indexReader, terms, field, boost, options, true)
}
================================================
FILE: search/searcher/search_term_range_test.go
================================================
// Copyright (c) 2017 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"os"
"reflect"
"sort"
"testing"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/search"
)
func TestTermRangeSearch(t *testing.T) {
twoDocIndexReader, err := twoDocIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := twoDocIndexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
tests := []struct {
min []byte
max []byte
inclusiveMin bool
inclusiveMax bool
field string
want []string
}{
{
min: []byte("marty"),
max: []byte("marty"),
field: "name",
inclusiveMin: true,
inclusiveMax: true,
want: []string{"1"},
},
{
min: []byte("marty"),
max: []byte("ravi"),
field: "name",
inclusiveMin: true,
inclusiveMax: true,
want: []string{"1", "4"},
},
// inclusive max false should exclude ravi
{
min: []byte("marty"),
max: []byte("ravi"),
field: "name",
inclusiveMin: true,
inclusiveMax: false,
want: []string{"1"},
},
// inclusive max false should remove last/only item
{
min: []byte("martz"),
max: []byte("ravi"),
field: "name",
inclusiveMin: true,
inclusiveMax: false,
want: nil,
},
// inclusive min false should remove marty
{
min: []byte("marty"),
max: []byte("ravi"),
field: "name",
inclusiveMin: false,
inclusiveMax: true,
want: []string{"4"},
},
// inclusive min false should remove first/only item
{
min: []byte("marty"),
max: []byte("rav"),
field: "name",
inclusiveMin: false,
inclusiveMax: true,
want: nil,
},
// max nil sees everything after marty
{
min: []byte("marty"),
max: nil,
field: "name",
inclusiveMin: true,
inclusiveMax: true,
want: []string{"1", "2", "4"},
},
// min nil sees everything before ravi
{
min: nil,
max: []byte("ravi"),
field: "name",
inclusiveMin: true,
inclusiveMax: true,
want: []string{"1", "3", "4", "5"},
},
// min and max nil sees everything
{
min: nil,
max: nil,
field: "name",
inclusiveMin: true,
inclusiveMax: true,
want: []string{"1", "2", "3", "4", "5"},
},
// min and max nil sees everything, even with inclusiveMin false
{
min: nil,
max: nil,
field: "name",
inclusiveMin: false,
inclusiveMax: true,
want: []string{"1", "2", "3", "4", "5"},
},
// min and max nil sees everything, even with inclusiveMax false
{
min: nil,
max: nil,
field: "name",
inclusiveMin: true,
inclusiveMax: false,
want: []string{"1", "2", "3", "4", "5"},
},
// min and max nil sees everything, even with both false
{
min: nil,
max: nil,
field: "name",
inclusiveMin: false,
inclusiveMax: false,
want: []string{"1", "2", "3", "4", "5"},
},
// min and max non-nil, but match 0 terms
{
min: []byte("martz"),
max: []byte("rav"),
field: "name",
inclusiveMin: true,
inclusiveMax: true,
want: nil,
},
// min and max same (and term exists), both exclusive
{
min: []byte("marty"),
max: []byte("marty"),
field: "name",
inclusiveMin: false,
inclusiveMax: false,
want: nil,
},
}
for _, test := range tests {
searcher, err := NewTermRangeSearcher(context.TODO(), twoDocIndexReader, test.min, test.max,
&test.inclusiveMin, &test.inclusiveMax, test.field, 1.0, search.SearcherOptions{Explain: true})
if err != nil {
t.Fatal(err)
}
var got []string
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(
searcher.DocumentMatchPoolSize(), 0),
}
next, err := searcher.Next(ctx)
i := 0
for err == nil && next != nil {
got = append(got, string(next.IndexInternalID))
ctx.DocumentMatchPool.Put(next)
next, err = searcher.Next(ctx)
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v", err)
}
if !reflect.DeepEqual(got, test.want) {
t.Errorf("expected: %v, got %v for test %#v", test.want, got, test)
}
}
}
func TestTermRangeSearchTooManyTerms(t *testing.T) {
dir, _ := os.MkdirTemp("", "scorchTwoDoc")
defer func() {
_ = os.RemoveAll(dir)
}()
scorchIndex := initTwoDocScorch(dir)
// use lower limit for this test
origLimit := DisjunctionMaxClauseCount
DisjunctionMaxClauseCount = 2
defer func() {
DisjunctionMaxClauseCount = origLimit
}()
scorchReader, err := scorchIndex.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := scorchReader.Close()
if err != nil {
t.Fatal(err)
}
}()
want := []string{"1", "3", "4", "5"}
truth := true
searcher, err := NewTermRangeSearcher(context.TODO(), scorchReader, []byte("bobert"), []byte("ravi"),
&truth, &truth, "name", 1.0, search.SearcherOptions{Score: "none", IncludeTermVectors: false})
if err != nil {
t.Fatal(err)
}
var got []string
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(
searcher.DocumentMatchPoolSize(), 0),
}
next, err := searcher.Next(ctx)
i := 0
for err == nil && next != nil {
extId, err := scorchReader.ExternalID(next.IndexInternalID)
if err != nil {
t.Fatal(err)
}
got = append(got, extId)
ctx.DocumentMatchPool.Put(next)
next, err = searcher.Next(ctx)
if err != nil {
break
}
i++
}
if err != nil {
t.Fatalf("error iterating searcher: %v", err)
}
err = searcher.Close()
if err != nil {
t.Fatal(err)
}
// check that the expected number of term searchers were started
// 6 = 4 original terms, 1 optimized after first round, then final searcher
// from the last round
statsMap := scorchIndex.(*scorch.Scorch).StatsMap()
if statsMap["term_searchers_started"].(uint64) != 6 {
t.Errorf("expected 6 term searchers started, got %d", statsMap["term_searchers_started"])
}
// check that all started searchers were closed
if statsMap["term_searchers_started"] != statsMap["term_searchers_finished"] {
t.Errorf("expected all term searchers closed, %d started %d closed",
statsMap["term_searchers_started"], statsMap["term_searchers_finished"])
}
sort.Strings(got)
if !reflect.DeepEqual(got, want) {
t.Errorf("expected: %#v, got %#v", want, got)
}
}
================================================
FILE: search/searcher/search_term_test.go
================================================
// Copyright (c) 2013 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package searcher
import (
"context"
"math"
"testing"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/index/upsidedown"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/gtreap"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestTermSearcher(t *testing.T) {
queryTerm := "beer"
queryField := "desc"
queryBoost := 3.0
queryExplain := search.SearcherOptions{Explain: true}
analysisQueue := index.NewAnalysisQueue(1)
i, err := upsidedown.NewUpsideDownCouch(
gtreap.Name,
map[string]interface{}{
"path": "",
},
analysisQueue)
if err != nil {
t.Fatal(err)
}
err = i.Open()
if err != nil {
t.Fatal(err)
}
doc := document.NewDocument("a")
doc.AddField(document.NewTextField("desc", []uint64{}, []byte("beer")))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("b")
doc.AddField(document.NewTextField("desc", []uint64{}, []byte("beer")))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("c")
doc.AddField(document.NewTextField("desc", []uint64{}, []byte("beer")))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("d")
doc.AddField(document.NewTextField("desc", []uint64{}, []byte("beer")))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("e")
doc.AddField(document.NewTextField("desc", []uint64{}, []byte("beer")))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("f")
doc.AddField(document.NewTextField("desc", []uint64{}, []byte("beer")))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("g")
doc.AddField(document.NewTextField("desc", []uint64{}, []byte("beer")))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("h")
doc.AddField(document.NewTextField("desc", []uint64{}, []byte("beer")))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("i")
doc.AddField(document.NewTextField("desc", []uint64{}, []byte("beer")))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
doc = document.NewDocument("j")
doc.AddField(document.NewTextField("title", []uint64{}, []byte("cat")))
err = i.Update(doc)
if err != nil {
t.Fatal(err)
}
indexReader, err := i.Reader()
if err != nil {
t.Error(err)
}
defer func() {
err := indexReader.Close()
if err != nil {
t.Fatal(err)
}
}()
searcher, err := NewTermSearcher(context.TODO(), indexReader, queryTerm, queryField, queryBoost, queryExplain)
if err != nil {
t.Fatal(err)
}
defer func() {
err := searcher.Close()
if err != nil {
t.Fatal(err)
}
}()
searcher.SetQueryNorm(2.0)
docCount, err := indexReader.DocCount()
if err != nil {
t.Fatal(err)
}
idf := 1.0 + math.Log(float64(docCount)/float64(searcher.Count()+1.0))
expectedQueryWeight := 3 * idf * 3 * idf
if expectedQueryWeight != searcher.Weight() {
t.Errorf("expected weight %v got %v", expectedQueryWeight, searcher.Weight())
}
if searcher.Count() != 9 {
t.Errorf("expected count of 9, got %d", searcher.Count())
}
ctx := &search.SearchContext{
DocumentMatchPool: search.NewDocumentMatchPool(1, 0),
}
docMatch, err := searcher.Next(ctx)
if err != nil {
t.Errorf("expected result, got %v", err)
}
if !docMatch.IndexInternalID.Equals(index.IndexInternalID("a")) {
t.Errorf("expected result ID to be 'a', got '%s", docMatch.IndexInternalID)
}
ctx.DocumentMatchPool.Put(docMatch)
docMatch, err = searcher.Advance(ctx, index.IndexInternalID("c"))
if err != nil {
t.Errorf("expected result, got %v", err)
}
if !docMatch.IndexInternalID.Equals(index.IndexInternalID("c")) {
t.Errorf("expected result ID to be 'c' got '%s'", docMatch.IndexInternalID)
}
// try advancing past end
ctx.DocumentMatchPool.Put(docMatch)
docMatch, err = searcher.Advance(ctx, index.IndexInternalID("z"))
if err != nil {
t.Fatal(err)
}
if docMatch != nil {
t.Errorf("expected nil, got %v", docMatch)
}
// try pushing next past end
ctx.DocumentMatchPool.Put(docMatch)
docMatch, err = searcher.Next(ctx)
if err != nil {
t.Fatal(err)
}
if docMatch != nil {
t.Errorf("expected nil, got %v", docMatch)
}
}
================================================
FILE: search/sort.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"bytes"
"encoding/json"
"fmt"
"math"
"sort"
"strconv"
"strings"
"time"
"unicode/utf8"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/numeric"
"github.com/blevesearch/bleve/v2/util"
)
var (
HighTerm = strings.Repeat(string(utf8.MaxRune), 3)
LowTerm = string([]byte{0x00})
)
type SearchSort interface {
UpdateVisitor(field string, term []byte)
Value(a *DocumentMatch) string
DecodeValue(value string) string
Descending() bool
RequiresDocID() bool
RequiresScoring() bool
RequiresFields() []string
Reverse()
Copy() SearchSort
}
func ParseSearchSortObj(input map[string]interface{}) (SearchSort, error) {
descending, ok := input["desc"].(bool)
if !ok {
descending = false
}
by, ok := input["by"].(string)
if !ok {
return nil, fmt.Errorf("search sort must specify by")
}
switch by {
case "id":
return &SortDocID{
Desc: descending,
}, nil
case "score":
return &SortScore{
Desc: descending,
}, nil
case "geo_distance":
field, ok := input["field"].(string)
if !ok {
return nil, fmt.Errorf("search sort mode geo_distance must specify field")
}
lon, lat, foundLocation := geo.ExtractGeoPoint(input["location"])
if !foundLocation {
return nil, fmt.Errorf("unable to parse geo_distance location")
}
rvd := &SortGeoDistance{
Field: field,
Desc: descending,
Lon: lon,
Lat: lat,
unitMult: 1.0,
}
if distUnit, ok := input["unit"].(string); ok {
var err error
rvd.unitMult, err = geo.ParseDistanceUnit(distUnit)
if err != nil {
return nil, err
}
rvd.Unit = distUnit
}
return rvd, nil
case "field":
field, ok := input["field"].(string)
if !ok {
return nil, fmt.Errorf("search sort mode field must specify field")
}
rv := &SortField{
Field: field,
Desc: descending,
}
typ, ok := input["type"].(string)
if ok {
switch typ {
case "auto":
rv.Type = SortFieldAuto
case "string":
rv.Type = SortFieldAsString
case "number":
rv.Type = SortFieldAsNumber
case "date":
rv.Type = SortFieldAsDate
default:
return nil, fmt.Errorf("unknown sort field type: %s", typ)
}
}
mode, ok := input["mode"].(string)
if ok {
switch mode {
case "default":
rv.Mode = SortFieldDefault
case "min":
rv.Mode = SortFieldMin
case "max":
rv.Mode = SortFieldMax
default:
return nil, fmt.Errorf("unknown sort field mode: %s", mode)
}
}
missing, ok := input["missing"].(string)
if ok {
switch missing {
case "first":
rv.Missing = SortFieldMissingFirst
case "last":
rv.Missing = SortFieldMissingLast
default:
return nil, fmt.Errorf("unknown sort field missing: %s", missing)
}
}
return rv, nil
}
return nil, fmt.Errorf("unknown search sort by: %s", by)
}
func ParseSearchSortString(input string) SearchSort {
descending := false
if strings.HasPrefix(input, "-") {
descending = true
input = input[1:]
} else if strings.HasPrefix(input, "+") {
input = input[1:]
}
switch input {
case "_id":
return &SortDocID{
Desc: descending,
}
case "_score":
return &SortScore{
Desc: descending,
}
}
return &SortField{
Field: input,
Desc: descending,
}
}
func ParseSearchSortJSON(input json.RawMessage) (SearchSort, error) {
// first try to parse it as string
var sortString string
err := util.UnmarshalJSON(input, &sortString)
if err != nil {
var sortObj map[string]interface{}
err = util.UnmarshalJSON(input, &sortObj)
if err != nil {
return nil, err
}
return ParseSearchSortObj(sortObj)
}
return ParseSearchSortString(sortString), nil
}
func ParseSortOrderStrings(in []string) SortOrder {
rv := make(SortOrder, 0, len(in))
for _, i := range in {
ss := ParseSearchSortString(i)
rv = append(rv, ss)
}
return rv
}
func ParseSortOrderJSON(in []json.RawMessage) (SortOrder, error) {
rv := make(SortOrder, 0, len(in))
for _, i := range in {
ss, err := ParseSearchSortJSON(i)
if err != nil {
return nil, err
}
rv = append(rv, ss)
}
return rv, nil
}
type SortOrder []SearchSort
func (so SortOrder) Value(doc *DocumentMatch) {
for _, soi := range so {
value := soi.Value(doc)
doc.Sort = append(doc.Sort, value)
doc.DecodedSort = append(doc.DecodedSort, soi.DecodeValue(value))
}
}
func (so SortOrder) UpdateVisitor(field string, term []byte) {
for _, soi := range so {
soi.UpdateVisitor(field, term)
}
}
func (so SortOrder) Copy() SortOrder {
rv := make(SortOrder, len(so))
for i, soi := range so {
rv[i] = soi.Copy()
}
return rv
}
// Compare will compare two document matches using the specified sort order
// if both are numbers, we avoid converting back to term
func (so SortOrder) Compare(cachedScoring, cachedDesc []bool, i, j *DocumentMatch) int {
// compare the documents on all search sorts until a differences is found
for x := range so {
c := 0
if cachedScoring[x] {
if i.Score < j.Score {
c = -1
} else if i.Score > j.Score {
c = 1
}
} else {
iVal := i.Sort[x]
jVal := j.Sort[x]
if iVal < jVal {
c = -1
} else if iVal > jVal {
c = 1
}
}
if c == 0 {
continue
}
if cachedDesc[x] {
c = -c
}
return c
}
// if they are the same at this point, impose order based on index natural sort order
if i.HitNumber == j.HitNumber {
return 0
} else if i.HitNumber > j.HitNumber {
return 1
}
return -1
}
func (so SortOrder) RequiresScore() bool {
for _, soi := range so {
if soi.RequiresScoring() {
return true
}
}
return false
}
func (so SortOrder) RequiresDocID() bool {
for _, soi := range so {
if soi.RequiresDocID() {
return true
}
}
return false
}
func (so SortOrder) RequiredFields() []string {
var rv []string
for _, soi := range so {
rv = append(rv, soi.RequiresFields()...)
}
return rv
}
func (so SortOrder) CacheIsScore() []bool {
rv := make([]bool, 0, len(so))
for _, soi := range so {
rv = append(rv, soi.RequiresScoring())
}
return rv
}
func (so SortOrder) CacheDescending() []bool {
rv := make([]bool, 0, len(so))
for _, soi := range so {
rv = append(rv, soi.Descending())
}
return rv
}
func (so SortOrder) Reverse() {
for _, soi := range so {
soi.Reverse()
}
}
// SortFieldType lets you control some internal sort behavior
// normally leaving this to the zero-value of SortFieldAuto is fine
type SortFieldType int
const (
// SortFieldAuto applies heuristics attempt to automatically sort correctly
SortFieldAuto SortFieldType = iota
// SortFieldAsString forces sort as string (no prefix coded terms removed)
SortFieldAsString
// SortFieldAsNumber forces sort as string (prefix coded terms with shift > 0 removed)
SortFieldAsNumber
// SortFieldAsDate forces sort as string (prefix coded terms with shift > 0 removed)
SortFieldAsDate
)
// SortFieldMode describes the behavior if the field has multiple values
type SortFieldMode int
const (
// SortFieldDefault uses the first (or only) value, this is the default zero-value
SortFieldDefault SortFieldMode = iota // FIXME name is confusing
// SortFieldMin uses the minimum value
SortFieldMin
// SortFieldMax uses the maximum value
SortFieldMax
)
// SortFieldMissing controls where documents missing a field value should be sorted
type SortFieldMissing int
const (
// SortFieldMissingLast sorts documents missing a field at the end
SortFieldMissingLast SortFieldMissing = iota
// SortFieldMissingFirst sorts documents missing a field at the beginning
SortFieldMissingFirst
)
// SortField will sort results by the value of a stored field
//
// Field is the name of the field
// Descending reverse the sort order (default false)
// Type allows forcing of string/number/date behavior (default auto)
// Mode controls behavior for multi-values fields (default first)
// Missing controls behavior of missing values (default last)
type SortField struct {
Field string
Desc bool
Type SortFieldType
Mode SortFieldMode
Missing SortFieldMissing
values [][]byte
tmp [][]byte
}
// UpdateVisitor notifies this sort field that in this document
// this field has the specified term
func (s *SortField) UpdateVisitor(field string, term []byte) {
if field == s.Field {
s.values = append(s.values, term)
}
}
// Value returns the sort value of the DocumentMatch
// it also resets the state of this SortField for
// processing the next document
func (s *SortField) Value(i *DocumentMatch) string {
iTerms := s.filterTermsByType(s.values)
iTerm := s.filterTermsByMode(iTerms)
s.values = s.values[:0]
return iTerm
}
func (s *SortField) DecodeValue(value string) string {
switch s.Type {
case SortFieldAsNumber:
i64, err := numeric.PrefixCoded(value).Int64()
if err != nil {
return value
}
return strconv.FormatFloat(numeric.Int64ToFloat64(i64), 'f', -1, 64)
case SortFieldAsDate:
i64, err := numeric.PrefixCoded(value).Int64()
if err != nil {
return value
}
return time.Unix(0, i64).UTC().Format(time.RFC3339Nano)
default:
return value
}
}
// Descending determines the order of the sort
func (s *SortField) Descending() bool {
return s.Desc
}
func (s *SortField) filterTermsByMode(terms [][]byte) string {
if len(terms) == 1 || (len(terms) > 1 && s.Mode == SortFieldDefault) {
return string(terms[0])
} else if len(terms) > 1 {
switch s.Mode {
case SortFieldMin:
sort.Sort(BytesSlice(terms))
return string(terms[0])
case SortFieldMax:
sort.Sort(BytesSlice(terms))
return string(terms[len(terms)-1])
}
}
// handle missing terms
if s.Missing == SortFieldMissingLast {
if s.Desc {
return LowTerm
}
return HighTerm
}
if s.Desc {
return HighTerm
}
return LowTerm
}
// filterTermsByType attempts to make one pass on the terms
// if we are in auto-mode AND all the terms look like prefix-coded numbers
// return only the terms which had shift of 0
// if we are in explicit number or date mode, return only valid
// prefix coded numbers with shift of 0
func (s *SortField) filterTermsByType(terms [][]byte) [][]byte {
stype := s.Type
switch stype {
case SortFieldAuto:
allTermsPrefixCoded := true
termsWithShiftZero := s.tmp[:0]
for _, term := range terms {
valid, shift := numeric.ValidPrefixCodedTermBytes(term)
if valid && shift == 0 {
termsWithShiftZero = append(termsWithShiftZero, term)
} else if !valid {
allTermsPrefixCoded = false
}
}
// reset the terms only when valid zero shift terms are found.
if allTermsPrefixCoded && len(termsWithShiftZero) > 0 {
terms = termsWithShiftZero
s.tmp = termsWithShiftZero[:0]
}
case SortFieldAsNumber, SortFieldAsDate:
termsWithShiftZero := s.tmp[:0]
for _, term := range terms {
valid, shift := numeric.ValidPrefixCodedTermBytes(term)
if valid && shift == 0 {
termsWithShiftZero = append(termsWithShiftZero, term)
}
}
terms = termsWithShiftZero
s.tmp = termsWithShiftZero[:0]
}
return terms
}
// RequiresDocID says this SearchSort does not require the DocID be loaded
func (s *SortField) RequiresDocID() bool { return false }
// RequiresScoring says this SearchStore does not require scoring
func (s *SortField) RequiresScoring() bool { return false }
// RequiresFields says this SearchStore requires the specified stored field
func (s *SortField) RequiresFields() []string { return []string{s.Field} }
func (s *SortField) MarshalJSON() ([]byte, error) {
// see if simple format can be used
if s.Missing == SortFieldMissingLast &&
s.Mode == SortFieldDefault &&
s.Type == SortFieldAuto {
if s.Desc {
return json.Marshal("-" + s.Field)
}
return json.Marshal(s.Field)
}
sfm := map[string]interface{}{
"by": "field",
"field": s.Field,
}
if s.Desc {
sfm["desc"] = true
}
if s.Missing > SortFieldMissingLast {
switch s.Missing {
case SortFieldMissingFirst:
sfm["missing"] = "first"
}
}
if s.Mode > SortFieldDefault {
switch s.Mode {
case SortFieldMin:
sfm["mode"] = "min"
case SortFieldMax:
sfm["mode"] = "max"
}
}
if s.Type > SortFieldAuto {
switch s.Type {
case SortFieldAsString:
sfm["type"] = "string"
case SortFieldAsNumber:
sfm["type"] = "number"
case SortFieldAsDate:
sfm["type"] = "date"
}
}
return json.Marshal(sfm)
}
func (s *SortField) Copy() SearchSort {
rv := *s
return &rv
}
func (s *SortField) Reverse() {
s.Desc = !s.Desc
if s.Missing == SortFieldMissingFirst {
s.Missing = SortFieldMissingLast
} else {
s.Missing = SortFieldMissingFirst
}
}
// SortDocID will sort results by the document identifier
type SortDocID struct {
Desc bool
}
// UpdateVisitor is a no-op for SortDocID as it's value
// is not dependent on any field terms
func (s *SortDocID) UpdateVisitor(field string, term []byte) {
}
// Value returns the sort value of the DocumentMatch
func (s *SortDocID) Value(i *DocumentMatch) string {
return i.ID
}
func (s *SortDocID) DecodeValue(value string) string {
return value
}
// Descending determines the order of the sort
func (s *SortDocID) Descending() bool {
return s.Desc
}
// RequiresDocID says this SearchSort does require the DocID be loaded
func (s *SortDocID) RequiresDocID() bool { return true }
// RequiresScoring says this SearchStore does not require scoring
func (s *SortDocID) RequiresScoring() bool { return false }
// RequiresFields says this SearchStore does not require any stored fields
func (s *SortDocID) RequiresFields() []string { return nil }
func (s *SortDocID) MarshalJSON() ([]byte, error) {
if s.Desc {
return json.Marshal("-_id")
}
return json.Marshal("_id")
}
func (s *SortDocID) Copy() SearchSort {
rv := *s
return &rv
}
func (s *SortDocID) Reverse() {
s.Desc = !s.Desc
}
// SortScore will sort results by the document match score
type SortScore struct {
Desc bool
}
// UpdateVisitor is a no-op for SortScore as it's value
// is not dependent on any field terms
func (s *SortScore) UpdateVisitor(field string, term []byte) {
}
// Value returns the sort value of the DocumentMatch
func (s *SortScore) Value(i *DocumentMatch) string {
return "_score"
}
func (s *SortScore) DecodeValue(value string) string {
return value
}
// Descending determines the order of the sort
func (s *SortScore) Descending() bool {
return s.Desc
}
// RequiresDocID says this SearchSort does not require the DocID be loaded
func (s *SortScore) RequiresDocID() bool { return false }
// RequiresScoring says this SearchStore does require scoring
func (s *SortScore) RequiresScoring() bool { return true }
// RequiresFields says this SearchStore does not require any store fields
func (s *SortScore) RequiresFields() []string { return nil }
func (s *SortScore) MarshalJSON() ([]byte, error) {
if s.Desc {
return json.Marshal("-_score")
}
return json.Marshal("_score")
}
func (s *SortScore) Copy() SearchSort {
rv := *s
return &rv
}
func (s *SortScore) Reverse() {
s.Desc = !s.Desc
}
var maxDistance = string(numeric.MustNewPrefixCodedInt64(math.MaxInt64, 0))
// NewSortGeoDistance creates SearchSort instance for sorting documents by
// their distance from the specified point.
func NewSortGeoDistance(field, unit string, lon, lat float64, desc bool) (
*SortGeoDistance, error,
) {
rv := &SortGeoDistance{
Field: field,
Desc: desc,
Unit: unit,
Lon: lon,
Lat: lat,
}
var err error
rv.unitMult, err = geo.ParseDistanceUnit(unit)
if err != nil {
return nil, err
}
return rv, nil
}
// SortGeoDistance will sort results by the distance of an
// indexed geo point, from the provided location.
//
// Field is the name of the field
// Descending reverse the sort order (default false)
type SortGeoDistance struct {
Field string
Desc bool
Unit string
values [][]byte
Lon float64
Lat float64
unitMult float64
tmp []byte
}
// UpdateVisitor notifies this sort field that in this document
// this field has the specified term
func (s *SortGeoDistance) UpdateVisitor(field string, term []byte) {
if field == s.Field {
s.values = append(s.values, term)
}
}
// Value returns the sort value of the DocumentMatch
// it also resets the state of this SortGeoDistance for
// processing the next document
func (s *SortGeoDistance) Value(i *DocumentMatch) string {
iTerm := s.findPrefixCodedNumericTerm(s.values)
s.values = s.values[:0]
if iTerm == nil {
return maxDistance
}
i64, err := numeric.PrefixCoded(iTerm).Int64()
if err != nil {
return maxDistance
}
docLon := geo.MortonUnhashLon(uint64(i64))
docLat := geo.MortonUnhashLat(uint64(i64))
dist := geo.Haversin(s.Lon, s.Lat, docLon, docLat)
// dist is returned in km, so convert to m
dist *= 1000
if s.unitMult != 0 {
dist /= s.unitMult
}
distInt64 := numeric.Float64ToInt64(dist)
s.tmp = numeric.MustNewPrefixCodedInt64Prealloc(distInt64, 0, s.tmp)
return string(s.tmp)
}
func (s *SortGeoDistance) DecodeValue(value string) string {
distInt, err := numeric.PrefixCoded(value).Int64()
if err != nil {
return ""
}
return strconv.FormatFloat(numeric.Int64ToFloat64(distInt), 'f', -1, 64)
}
// Descending determines the order of the sort
func (s *SortGeoDistance) Descending() bool {
return s.Desc
}
// findPrefixCodedNumericTerm looks through the provided terms
// and returns the first valid prefix coded numeric term with shift of 0
func (s *SortGeoDistance) findPrefixCodedNumericTerm(terms [][]byte) []byte {
for _, term := range terms {
valid, shift := numeric.ValidPrefixCodedTermBytes(term)
if valid && shift == 0 {
return term
}
}
return nil
}
// RequiresDocID says this SearchSort does not require the DocID be loaded
func (s *SortGeoDistance) RequiresDocID() bool { return false }
// RequiresScoring says this SearchStore does not require scoring
func (s *SortGeoDistance) RequiresScoring() bool { return false }
// RequiresFields says this SearchStore requires the specified stored field
func (s *SortGeoDistance) RequiresFields() []string { return []string{s.Field} }
func (s *SortGeoDistance) MarshalJSON() ([]byte, error) {
sfm := map[string]interface{}{
"by": "geo_distance",
"field": s.Field,
"location": map[string]interface{}{
"lon": s.Lon,
"lat": s.Lat,
},
}
if s.Unit != "" {
sfm["unit"] = s.Unit
}
if s.Desc {
sfm["desc"] = true
}
return json.Marshal(sfm)
}
func (s *SortGeoDistance) Copy() SearchSort {
rv := *s
return &rv
}
func (s *SortGeoDistance) Reverse() {
s.Desc = !s.Desc
}
type BytesSlice [][]byte
func (p BytesSlice) Len() int { return len(p) }
func (p BytesSlice) Less(i, j int) bool { return bytes.Compare(p[i], p[j]) < 0 }
func (p BytesSlice) Swap(i, j int) { p[i], p[j] = p[j], p[i] }
================================================
FILE: search/sort_test.go
================================================
package search
import (
"reflect"
"testing"
)
func TestParseSearchSortObj(t *testing.T) {
tests := []struct {
name string
input map[string]interface{}
want SearchSort
wantErr bool
}{
{
name: "sort by id",
input: map[string]interface{}{
"by": "id",
"desc": false,
},
want: &SortDocID{
Desc: false,
},
wantErr: false,
},
{
name: "sort by id descending",
input: map[string]interface{}{
"by": "id",
"desc": true,
},
want: &SortDocID{
Desc: true,
},
wantErr: false,
},
{
name: "sort by score",
input: map[string]interface{}{
"by": "score",
"desc": false,
},
want: &SortScore{
Desc: false,
},
wantErr: false,
},
{
name: "sort by score descending",
input: map[string]interface{}{
"by": "score",
"desc": true,
},
want: &SortScore{
Desc: true,
},
wantErr: false,
},
{
name: "sort by geo_distance",
input: map[string]interface{}{
"by": "geo_distance",
"field": "location",
"location": map[string]interface{}{
"lon": 1.0,
"lat": 2.0,
},
"unit": "km",
"desc": false,
},
want: &SortGeoDistance{
Field: "location",
Desc: false,
Lon: 1.0,
Lat: 2.0,
Unit: "km",
unitMult: 1000.0,
},
wantErr: false,
},
{
name: "sort by field",
input: map[string]interface{}{
"by": "field",
"field": "name",
"desc": false,
"type": "auto",
"mode": "default",
"missing": "last",
},
want: &SortField{
Field: "name",
Desc: false,
Type: SortFieldAuto,
Mode: SortFieldDefault,
Missing: SortFieldMissingLast,
},
wantErr: false,
},
{
name: "sort by field with missing",
input: map[string]interface{}{
"by": "field",
"field": "name",
"desc": false,
"type": "auto",
"mode": "default",
"missing": "first",
},
want: &SortField{
Field: "name",
Desc: false,
Type: SortFieldAuto,
Mode: SortFieldDefault,
Missing: SortFieldMissingFirst,
},
wantErr: false,
},
{
name: "sort by field descending",
input: map[string]interface{}{
"by": "field",
"field": "name",
"desc": true,
"type": "string",
"mode": "min",
"missing": "first",
},
want: &SortField{
Field: "name",
Desc: true,
Type: SortFieldAsString,
Mode: SortFieldMin,
Missing: SortFieldMissingFirst,
},
wantErr: false,
},
{
name: "missing by",
input: map[string]interface{}{
"desc": true,
},
want: nil,
wantErr: true,
},
{
name: "unknown by",
input: map[string]interface{}{
"by": "unknown",
},
want: nil,
wantErr: true,
},
{
name: "missing field for geo_distance",
input: map[string]interface{}{
"by": "geo_distance",
"location": map[string]interface{}{
"lon": 1.0,
"lat": 2.0,
},
},
want: nil,
wantErr: true,
},
{
name: "missing location for geo_distance",
input: map[string]interface{}{
"by": "geo_distance",
"field": "location",
},
want: nil,
wantErr: true,
},
{
name: "invalid unit for geo_distance",
input: map[string]interface{}{
"by": "geo_distance",
"field": "location",
"location": map[string]interface{}{
"lon": 1.0,
"lat": 2.0,
},
"unit": "invalid",
},
want: nil,
wantErr: true,
},
{
name: "missing field for field sort",
input: map[string]interface{}{
"by": "field",
},
want: nil,
wantErr: true,
},
{
name: "unknown type for field sort",
input: map[string]interface{}{
"by": "field",
"field": "name",
"type": "unknown",
},
want: nil,
wantErr: true,
},
{
name: "number type for field sort with desc",
input: map[string]interface{}{
"by": "field",
"field": "name",
"type": "number",
"mode": "default",
"desc": true,
"missing": "last",
},
want: &SortField{
Field: "name",
Desc: true,
Type: SortFieldAsNumber,
Mode: SortFieldDefault,
Missing: SortFieldMissingLast,
},
wantErr: false,
},
{
name: "date type for field sort with desc",
input: map[string]interface{}{
"by": "field",
"field": "name",
"type": "date",
"mode": "default",
"desc": true,
"missing": "last",
},
want: &SortField{
Field: "name",
Desc: true,
Type: SortFieldAsDate,
Mode: SortFieldDefault,
Missing: SortFieldMissingLast,
},
wantErr: false,
},
{
name: "unknown type for field sort with missing",
input: map[string]interface{}{
"by": "field",
"field": "name",
"type": "unknown",
"mode": "default",
"missing": "last",
},
want: nil,
wantErr: true,
},
{
name: "unknown mode for field sort",
input: map[string]interface{}{
"by": "field",
"field": "name",
"mode": "unknown",
},
want: nil,
wantErr: true,
},
{
name: "default mode for field sort",
input: map[string]interface{}{
"by": "field",
"field": "name",
"mode": "default",
},
want: &SortField{
Field: "name",
Desc: false,
Type: SortFieldAuto,
Mode: SortFieldDefault,
Missing: SortFieldMissingLast,
},
wantErr: false,
},
{
name: "max mode for field sort",
input: map[string]interface{}{
"by": "field",
"field": "name",
"mode": "max",
},
want: &SortField{
Field: "name",
Desc: false,
Type: SortFieldAuto,
Mode: SortFieldMax,
Missing: SortFieldMissingLast,
},
wantErr: false,
},
{
name: "min mode for field sort",
input: map[string]interface{}{
"by": "field",
"field": "name",
"mode": "min",
},
want: &SortField{
Field: "name",
Desc: false,
Type: SortFieldAuto,
Mode: SortFieldMin,
Missing: SortFieldMissingLast,
},
wantErr: false,
},
{
name: "unknown missing for field sort",
input: map[string]interface{}{
"by": "field",
"field": "name",
"missing": "unknown",
},
want: nil,
wantErr: true,
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
got, err := ParseSearchSortObj(tt.input)
if (err != nil) != tt.wantErr {
t.Errorf("ParseSearchSortObj() error = %v, wantErr %v", err, tt.wantErr)
return
}
if !reflect.DeepEqual(got, tt.want) {
t.Errorf("ParseSearchSortObj() = %v, want %v", got, tt.want)
}
})
}
}
================================================
FILE: search/util.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"context"
"github.com/blevesearch/geo/s2"
)
func MergeLocations(locations []FieldTermLocationMap) FieldTermLocationMap {
rv := locations[0]
for i := 1; i < len(locations); i++ {
nextLocations := locations[i]
for field, termLocationMap := range nextLocations {
rvTermLocationMap, rvHasField := rv[field]
if rvHasField {
rv[field] = MergeTermLocationMaps(rvTermLocationMap, termLocationMap)
} else {
rv[field] = termLocationMap
}
}
}
return rv
}
func MergeTermLocationMaps(rv, other TermLocationMap) TermLocationMap {
for term, locationMap := range other {
// for a given term/document there cannot be different locations
// if they came back from different clauses, overwrite is ok
rv[term] = locationMap
}
return rv
}
func MergeFieldTermLocations(dest []FieldTermLocation, matches []*DocumentMatch) []FieldTermLocation {
n := len(dest)
for _, dm := range matches {
if dm != nil {
n += len(dm.FieldTermLocations)
}
}
if cap(dest) < n {
dest = append(make([]FieldTermLocation, 0, n), dest...)
}
for _, dm := range matches {
if dm != nil {
dest = mergeFieldTermLocationFromMatch(dest, dm)
}
}
return dest
}
// MergeFieldTermLocationsFromMatch merges field term locations from a single DocumentMatch
// into dest, returning the updated slice.
func MergeFieldTermLocationsFromMatch(dest []FieldTermLocation, match *DocumentMatch) []FieldTermLocation {
if match == nil {
return dest
}
n := len(dest) + len(match.FieldTermLocations)
if cap(dest) < n {
dest = append(make([]FieldTermLocation, 0, n), dest...)
}
return mergeFieldTermLocationFromMatch(dest, match)
}
// mergeFieldTermLocationFromMatch appends field term locations from a DocumentMatch into dest.
// Assumes dest has sufficient capacity.
func mergeFieldTermLocationFromMatch(dest []FieldTermLocation, dm *DocumentMatch) []FieldTermLocation {
for _, ftl := range dm.FieldTermLocations {
dest = append(dest, FieldTermLocation{
Field: ftl.Field,
Term: ftl.Term,
Location: Location{
Pos: ftl.Location.Pos,
Start: ftl.Location.Start,
End: ftl.Location.End,
ArrayPositions: append(ArrayPositions(nil), ftl.Location.ArrayPositions...),
},
})
}
return dest
}
type (
SearchIncrementalCostCallbackMsg uint
SearchQueryType uint
)
const (
Term = SearchQueryType(1 << iota)
Geo
Numeric
GenericCost
)
const (
AddM = SearchIncrementalCostCallbackMsg(1 << iota)
AbortM
DoneM
)
// ContextKey is used to identify the context key in the context.Context
type ContextKey string
func (c ContextKey) String() string {
return string(c)
}
const (
SearchIncrementalCostKey ContextKey = "_search_incremental_cost_key"
QueryTypeKey ContextKey = "_query_type_key"
FuzzyMatchPhraseKey ContextKey = "_fuzzy_match_phrase_key"
IncludeScoreBreakdownKey ContextKey = "_include_score_breakdown_key"
// PreSearchKey indicates whether to perform a preliminary search to gather necessary
// information which would be used in the actual search down the line.
PreSearchKey ContextKey = "_presearch_key"
// GetScoringModelCallbackKey is used to help the underlying searcher identify
// which scoring mechanism to use based on index mapping.
GetScoringModelCallbackKey ContextKey = "_get_scoring_model"
// SearchIOStatsCallbackKey is used to help the underlying searcher identify
SearchIOStatsCallbackKey ContextKey = "_search_io_stats_callback_key"
// GeoBufferPoolCallbackKey ContextKey is used to help the underlying searcher
GeoBufferPoolCallbackKey ContextKey = "_geo_buffer_pool_callback_key"
// SearchTypeKey is used to identify type of the search being performed.
//
// for consistent scoring in cases an index is partitioned/sharded (using an
// index alias), GlobalScoring helps in aggregating the necessary stats across
// all the child bleve indexes (shards/partitions) first before the actual search
// is performed, such that the scoring involved using these stats would be at a
// global level.
SearchTypeKey ContextKey = "_search_type_key"
// The following keys are used to invoke the callbacks at the start and end stages
// of optimizing the disjunction/conjunction searcher creation.
SearcherStartCallbackKey ContextKey = "_searcher_start_callback_key"
SearcherEndCallbackKey ContextKey = "_searcher_end_callback_key"
// FieldTermSynonymMapKey is used to store and transport the synonym definitions data
// to the actual search phase which would use the synonyms to perform the search.
FieldTermSynonymMapKey ContextKey = "_field_term_synonym_map_key"
// BM25StatsKey is used to store and transport the BM25 Data
// to the actual search phase which would use it to perform the search.
BM25StatsKey ContextKey = "_bm25_stats_key"
// ScoreFusionKey is used to communicate whether KNN hits need to be preserved for
// hybrid search algorithms (like RRF)
ScoreFusionKey ContextKey = "_fusion_rescoring_key"
// NestedSearchKey is used to communicate whether the search is performed
// in an index with nested documents
NestedSearchKey ContextKey = "_nested_search_key"
)
func RecordSearchCost(ctx context.Context,
msg SearchIncrementalCostCallbackMsg, bytes uint64,
) {
if ctx != nil {
queryType, ok := ctx.Value(QueryTypeKey).(SearchQueryType)
if !ok {
// for the cost of the non query type specific factors such as
// doc values and stored fields section.
queryType = GenericCost
}
aggCallbackFn := ctx.Value(SearchIncrementalCostKey)
if aggCallbackFn != nil {
aggCallbackFn.(SearchIncrementalCostCallbackFn)(msg, queryType, bytes)
}
}
}
// Assigning the size of the largest buffer in the pool to 24KB and
// the smallest buffer to 24 bytes. The pools are used to read a
// sequence of vertices which are always 24 bytes each.
const (
MaxGeoBufPoolSize = 24 * 1024
MinGeoBufPoolSize = 24
)
// PreSearchDataKey are used to store the data gathered during the presearch phase
// which would be use in the actual search phase.
const (
KnnPreSearchDataKey = "_knn_pre_search_data_key"
SynonymPreSearchDataKey = "_synonym_pre_search_data_key"
BM25PreSearchDataKey = "_bm25_pre_search_data_key"
)
const GlobalScoring = "_global_scoring"
type (
// SearcherStartCallbackFn is a callback function type used to signal the start of
// searcher creation phase.
SearcherStartCallbackFn func(size uint64) error
// SearcherEndCallbackFn is a callback function type used to signal the end of
// a searcher creation phase.
SearcherEndCallbackFn func(size uint64) error
// GetScoringModelCallbackFn is a callback function type used to get the scoring model
// to be used for scoring documents during search.
GetScoringModelCallbackFn func() string
// HybridMergeCallbackFn is a callback function type used to merge a KNN document match
// into a full text search document match, of the same docID as part of hybrid search.
HybridMergeCallbackFn func(ftsMatch *DocumentMatch, knnMatch *DocumentMatch)
// DescendantAdderCallback is a callback function type used to customize how a descendant
// DocumentMatch is merged into its parent. This allows different descendant addition strategies for
// different use cases (e.g., TopN vs KNN collection).
DescendantAdderCallbackFn func(parent *DocumentMatch, descendant *DocumentMatch) error
// GeoBufferPoolCallbackFunc is a callback function type used to get the geo buffer pool
// to be used during geo searches.
GeoBufferPoolCallbackFunc func() *s2.GeoBufferPool
// SearchIOStatsCallbackFunc is a callback function type used to report search IO stats
// during search.
SearchIOStatsCallbackFunc func(uint64)
// Implementation of SearchIncrementalCostCallbackFn should handle the following messages
// - add: increment the cost of a search operation
// (which can be specific to a query type as well)
// - abort: query was aborted due to a cancel of search's context (for eg),
// which can be handled differently as well
// - done: indicates that a search was complete and the tracked cost can be
// handled safely by the implementation.
SearchIncrementalCostCallbackFn func(SearchIncrementalCostCallbackMsg,
SearchQueryType, uint64)
)
// field -> term -> synonyms
type FieldTermSynonymMap map[string]map[string][]string
func (f FieldTermSynonymMap) MergeWith(fts FieldTermSynonymMap) {
for field, termSynonymMap := range fts {
// Ensure the field exists in the receiver
if _, exists := f[field]; !exists {
f[field] = make(map[string][]string)
}
for term, synonyms := range termSynonymMap {
// Append synonyms
f[field][term] = append(f[field][term], synonyms...)
}
}
}
// BM25 specific multipliers which control the scoring of a document.
//
// BM25_b - controls the extent to which doc's field length normalize term frequency part of score
// BM25_k1 - controls the saturation of the score due to term frequency
// the default values are as per elastic search's implementation
// - https://www.elastic.co/guide/en/elasticsearch/reference/current/index-modules-similarity.html#bm25
// - https://www.elastic.co/blog/practical-bm25-part-3-considerations-for-picking-b-and-k1-in-elasticsearch
var (
BM25_k1 float64 = 1.2
BM25_b float64 = 0.75
)
type BM25Stats struct {
DocCount float64 `json:"doc_count"`
FieldCardinality map[string]int `json:"field_cardinality"`
}
// FieldSet represents a set of queried fields.
type FieldSet map[string]struct{}
// NewFieldSet creates a new FieldSet.
func NewFieldSet() FieldSet {
return make(map[string]struct{})
}
// Add adds a field to the set.
func (fs FieldSet) AddField(field string) {
fs[field] = struct{}{}
}
// HasID returns true if the field set contains the "_id" field.
func (fs FieldSet) HasID() bool {
_, ok := fs["_id"]
return ok
}
// HasAll returns true if the field set contains the "_all" field.
func (fs FieldSet) HasAll() bool {
_, ok := fs["_all"]
return ok
}
================================================
FILE: search/util_test.go
================================================
// Copyright (c) 2013 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package search
import (
"reflect"
"testing"
)
func TestMergeLocations(t *testing.T) {
flm1 := FieldTermLocationMap{
"marty": TermLocationMap{
"name": {
&Location{
Pos: 1,
Start: 0,
End: 5,
},
},
},
}
flm2 := FieldTermLocationMap{
"marty": TermLocationMap{
"description": {
&Location{
Pos: 5,
Start: 20,
End: 25,
},
},
},
}
flm3 := FieldTermLocationMap{
"josh": TermLocationMap{
"description": {
&Location{
Pos: 5,
Start: 20,
End: 25,
},
},
},
}
expectedMerge := FieldTermLocationMap{
"marty": TermLocationMap{
"description": {
&Location{
Pos: 5,
Start: 20,
End: 25,
},
},
"name": {
&Location{
Pos: 1,
Start: 0,
End: 5,
},
},
},
"josh": TermLocationMap{
"description": {
&Location{
Pos: 5,
Start: 20,
End: 25,
},
},
},
}
mergedLocations := MergeLocations([]FieldTermLocationMap{flm1, flm2, flm3})
if !reflect.DeepEqual(expectedMerge, mergedLocations) {
t.Errorf("expected %v, got %v", expectedMerge, mergedLocations)
}
}
================================================
FILE: search.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"fmt"
"reflect"
"regexp"
"sort"
"strconv"
"strings"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/datetime/optional"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/registry"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/collector"
"github.com/blevesearch/bleve/v2/search/query"
"github.com/blevesearch/bleve/v2/size"
"github.com/blevesearch/bleve/v2/util"
)
var (
reflectStaticSizeSearchResult int
reflectStaticSizeSearchStatus int
)
func init() {
var sr SearchResult
reflectStaticSizeSearchResult = int(reflect.TypeOf(sr).Size())
var ss SearchStatus
reflectStaticSizeSearchStatus = int(reflect.TypeOf(ss).Size())
}
var cache = registry.NewCache()
const defaultDateTimeParser = optional.Name
const (
ScoreDefault = ""
ScoreNone = "none"
ScoreRRF = "rrf"
ScoreRSF = "rsf"
)
var AllowedFusionSort = search.SortOrder{&search.SortScore{Desc: true}}
type dateTimeRange struct {
Name string `json:"name,omitempty"`
Start time.Time `json:"start,omitempty"`
End time.Time `json:"end,omitempty"`
DateTimeParser string `json:"datetime_parser,omitempty"`
startString *string
endString *string
}
func (dr *dateTimeRange) ParseDates(dateTimeParser analysis.DateTimeParser) (start, end time.Time, err error) {
start = dr.Start
if dr.Start.IsZero() && dr.startString != nil {
s, _, parseError := dateTimeParser.ParseDateTime(*dr.startString)
if parseError != nil {
return start, end, fmt.Errorf("error parsing start date '%s' for date range name '%s': %v", *dr.startString, dr.Name, parseError)
}
start = s
}
end = dr.End
if dr.End.IsZero() && dr.endString != nil {
e, _, parseError := dateTimeParser.ParseDateTime(*dr.endString)
if parseError != nil {
return start, end, fmt.Errorf("error parsing end date '%s' for date range name '%s': %v", *dr.endString, dr.Name, parseError)
}
end = e
}
return start, end, err
}
func (dr *dateTimeRange) UnmarshalJSON(input []byte) error {
var temp struct {
Name string `json:"name,omitempty"`
Start *string `json:"start,omitempty"`
End *string `json:"end,omitempty"`
DateTimeParser string `json:"datetime_parser,omitempty"`
}
if err := util.UnmarshalJSON(input, &temp); err != nil {
return err
}
dr.Name = temp.Name
if temp.Start != nil {
dr.startString = temp.Start
}
if temp.End != nil {
dr.endString = temp.End
}
if temp.DateTimeParser != "" {
dr.DateTimeParser = temp.DateTimeParser
}
return nil
}
func (dr *dateTimeRange) MarshalJSON() ([]byte, error) {
rv := map[string]interface{}{
"name": dr.Name,
}
if !dr.Start.IsZero() {
rv["start"] = dr.Start
} else if dr.startString != nil {
rv["start"] = dr.startString
}
if !dr.End.IsZero() {
rv["end"] = dr.End
} else if dr.endString != nil {
rv["end"] = dr.endString
}
if dr.DateTimeParser != "" {
rv["datetime_parser"] = dr.DateTimeParser
}
return util.MarshalJSON(rv)
}
type numericRange struct {
Name string `json:"name,omitempty"`
Min *float64 `json:"min,omitempty"`
Max *float64 `json:"max,omitempty"`
}
// A FacetRequest describes a facet or aggregation
// of the result document set you would like to be
// built.
type FacetRequest struct {
Size int `json:"size"`
Field string `json:"field"`
TermPrefix string `json:"term_prefix,omitempty"`
TermPattern string `json:"term_pattern,omitempty"`
NumericRanges []*numericRange `json:"numeric_ranges,omitempty"`
DateTimeRanges []*dateTimeRange `json:"date_ranges,omitempty"`
// Compiled regex pattern (cached during validation)
compiledPattern *regexp.Regexp
}
// NewFacetRequest creates a facet on the specified
// field that limits the number of entries to the
// specified size.
func NewFacetRequest(field string, size int) *FacetRequest {
return &FacetRequest{
Field: field,
Size: size,
}
}
// SetPrefixFilter sets the prefix filter for term facets.
func (fr *FacetRequest) SetPrefixFilter(prefix string) {
fr.TermPrefix = prefix
}
// SetRegexFilter sets the regex pattern filter for term facets.
func (fr *FacetRequest) SetRegexFilter(pattern string) {
fr.TermPattern = pattern
}
func (fr *FacetRequest) Validate() error {
// Validate regex pattern if provided and cache the compiled regex
if fr.TermPattern != "" {
compiled, err := regexp.Compile(fr.TermPattern)
if err != nil {
return fmt.Errorf("invalid term pattern: %v", err)
}
fr.compiledPattern = compiled
}
nrCount := len(fr.NumericRanges)
drCount := len(fr.DateTimeRanges)
if nrCount > 0 && drCount > 0 {
return fmt.Errorf("facet can only contain numeric ranges or date ranges, not both")
}
if nrCount > 0 {
nrNames := map[string]interface{}{}
for _, nr := range fr.NumericRanges {
if _, ok := nrNames[nr.Name]; ok {
return fmt.Errorf("numeric ranges contains duplicate name '%s'", nr.Name)
}
nrNames[nr.Name] = struct{}{}
if nr.Min == nil && nr.Max == nil {
return fmt.Errorf("numeric range query must specify either min, max or both for range name '%s'", nr.Name)
}
}
} else {
dateTimeParser, err := cache.DateTimeParserNamed(defaultDateTimeParser)
if err != nil {
return err
}
drNames := map[string]interface{}{}
for _, dr := range fr.DateTimeRanges {
if _, ok := drNames[dr.Name]; ok {
return fmt.Errorf("date ranges contains duplicate name '%s'", dr.Name)
}
drNames[dr.Name] = struct{}{}
if dr.DateTimeParser == "" {
// cannot parse the date range dates as the defaultDateTimeParser is overridden
// so perform this validation at query time
start, end, err := dr.ParseDates(dateTimeParser)
if err != nil {
return fmt.Errorf("ParseDates err: %v, using date time parser named %s", err, defaultDateTimeParser)
}
if start.IsZero() && end.IsZero() {
return fmt.Errorf("date range query must specify either start, end or both for range name '%s'", dr.Name)
}
}
}
}
return nil
}
// AddDateTimeRange adds a bucket to a field
// containing date values. Documents with a
// date value falling into this range are tabulated
// as part of this bucket/range.
func (fr *FacetRequest) AddDateTimeRange(name string, start, end time.Time) {
if fr.DateTimeRanges == nil {
fr.DateTimeRanges = make([]*dateTimeRange, 0, 1)
}
fr.DateTimeRanges = append(fr.DateTimeRanges, &dateTimeRange{Name: name, Start: start, End: end})
}
// AddDateTimeRangeString adds a bucket to a field
// containing date values. Uses defaultDateTimeParser to parse the date strings.
func (fr *FacetRequest) AddDateTimeRangeString(name string, start, end *string) {
if fr.DateTimeRanges == nil {
fr.DateTimeRanges = make([]*dateTimeRange, 0, 1)
}
fr.DateTimeRanges = append(fr.DateTimeRanges,
&dateTimeRange{Name: name, startString: start, endString: end})
}
// AddDateTimeRangeString adds a bucket to a field
// containing date values. Uses the specified parser to parse the date strings.
// provided the parser is registered in the index mapping.
func (fr *FacetRequest) AddDateTimeRangeStringWithParser(name string, start, end *string, parser string) {
if fr.DateTimeRanges == nil {
fr.DateTimeRanges = make([]*dateTimeRange, 0, 1)
}
fr.DateTimeRanges = append(fr.DateTimeRanges,
&dateTimeRange{Name: name, startString: start, endString: end, DateTimeParser: parser})
}
// AddNumericRange adds a bucket to a field
// containing numeric values. Documents with a
// numeric value falling into this range are
// tabulated as part of this bucket/range.
func (fr *FacetRequest) AddNumericRange(name string, min, max *float64) {
if fr.NumericRanges == nil {
fr.NumericRanges = make([]*numericRange, 0, 1)
}
fr.NumericRanges = append(fr.NumericRanges, &numericRange{Name: name, Min: min, Max: max})
}
// FacetsRequest groups together all the
// FacetRequest objects for a single query.
type FacetsRequest map[string]*FacetRequest
func (fr FacetsRequest) Validate() error {
for _, v := range fr {
if err := v.Validate(); err != nil {
return err
}
}
return nil
}
// HighlightRequest describes how field matches
// should be highlighted.
type HighlightRequest struct {
Style *string `json:"style"`
Fields []string `json:"fields"`
}
// NewHighlight creates a default
// HighlightRequest.
func NewHighlight() *HighlightRequest {
return &HighlightRequest{}
}
// NewHighlightWithStyle creates a HighlightRequest
// with an alternate style.
func NewHighlightWithStyle(style string) *HighlightRequest {
return &HighlightRequest{
Style: &style,
}
}
func (h *HighlightRequest) AddField(field string) {
if h.Fields == nil {
h.Fields = make([]string, 0, 1)
}
h.Fields = append(h.Fields, field)
}
func (r *SearchRequest) Validate() error {
if srq, ok := r.Query.(query.ValidatableQuery); ok {
err := srq.Validate()
if err != nil {
return err
}
}
if r.SearchAfter != nil && r.SearchBefore != nil {
return fmt.Errorf("cannot use search after and search before together")
}
if r.SearchAfter != nil {
if r.From != 0 {
return fmt.Errorf("cannot use search after with from !=0")
}
if len(r.SearchAfter) != len(r.Sort) {
return fmt.Errorf("search after must have same size as sort order")
}
}
if r.SearchBefore != nil {
if r.From != 0 {
return fmt.Errorf("cannot use search before with from !=0")
}
if len(r.SearchBefore) != len(r.Sort) {
return fmt.Errorf("search before must have same size as sort order")
}
}
err := r.validatePagination()
if err != nil {
return err
}
if IsScoreFusionRequested(r) {
if r.SearchAfter != nil || r.SearchBefore != nil {
return fmt.Errorf("cannot use search after or search before with score fusion")
}
if r.Sort != nil {
if !reflect.DeepEqual(r.Sort, AllowedFusionSort) {
return fmt.Errorf("sort must be empty or descending order of score for score fusion")
}
}
}
err = validateKNN(r)
if err != nil {
return err
}
return r.Facets.Validate()
}
// Validates SearchAfter/SearchBefore
func (r *SearchRequest) validatePagination() error {
var pagination []string
var afterOrBefore string
if r.SearchAfter != nil {
pagination = r.SearchAfter
afterOrBefore = "search after"
} else if r.SearchBefore != nil {
pagination = r.SearchBefore
afterOrBefore = "search before"
} else {
return nil
}
for i := range pagination {
switch ss := r.Sort[i].(type) {
case *search.SortGeoDistance:
_, err := strconv.ParseFloat(pagination[i], 64)
if err != nil {
return fmt.Errorf("invalid %s value for sort field '%s': '%s'. %s", afterOrBefore, ss.Field, pagination[i], err)
}
case *search.SortField:
switch ss.Type {
case search.SortFieldAsNumber:
_, err := strconv.ParseFloat(pagination[i], 64)
if err != nil {
return fmt.Errorf("invalid %s value for sort field '%s': '%s'. %s", afterOrBefore, ss.Field, pagination[i], err)
}
case search.SortFieldAsDate:
_, err := time.Parse(time.RFC3339Nano, pagination[i])
if err != nil {
return fmt.Errorf("invalid %s value for sort field '%s': '%s'. %s", afterOrBefore, ss.Field, pagination[i], err)
}
}
}
}
return nil
}
// AddFacet adds a FacetRequest to this SearchRequest
func (r *SearchRequest) AddFacet(facetName string, f *FacetRequest) {
if r.Facets == nil {
r.Facets = make(FacetsRequest, 1)
}
r.Facets[facetName] = f
}
// SortBy changes the request to use the requested sort order
// this form uses the simplified syntax with an array of strings
// each string can either be a field name
// or the magic value _id and _score which refer to the doc id and search score
// any of these values can optionally be prefixed with - to reverse the order
func (r *SearchRequest) SortBy(order []string) {
so := search.ParseSortOrderStrings(order)
r.Sort = so
}
// SortByCustom changes the request to use the requested sort order
func (r *SearchRequest) SortByCustom(order search.SortOrder) {
r.Sort = order
}
// SetSearchAfter sets the request to skip over hits with a sort
// value less than the provided sort after key
func (r *SearchRequest) SetSearchAfter(after []string) {
r.SearchAfter = after
}
// SetSearchBefore sets the request to skip over hits with a sort
// value greater than the provided sort before key
func (r *SearchRequest) SetSearchBefore(before []string) {
r.SearchBefore = before
}
// AddParams adds a RequestParams field to the search request
func (r *SearchRequest) AddParams(params RequestParams) {
r.Params = ¶ms
}
// NewSearchRequest creates a new SearchRequest
// for the Query, using default values for all
// other search parameters.
func NewSearchRequest(q query.Query) *SearchRequest {
return NewSearchRequestOptions(q, 10, 0, false)
}
// NewSearchRequestOptions creates a new SearchRequest
// for the Query, with the requested size, from
// and explanation search parameters.
// By default results are ordered by score, descending.
func NewSearchRequestOptions(q query.Query, size, from int, explain bool) *SearchRequest {
return &SearchRequest{
Query: q,
Size: size,
From: from,
Explain: explain,
Sort: search.SortOrder{&search.SortScore{Desc: true}},
}
}
// IndexErrMap tracks errors with the name of the index where it occurred
type IndexErrMap map[string]error
// MarshalJSON serializes the error into a string for JSON consumption
func (iem IndexErrMap) MarshalJSON() ([]byte, error) {
tmp := make(map[string]string, len(iem))
for k, v := range iem {
tmp[k] = v.Error()
}
return util.MarshalJSON(tmp)
}
func (iem IndexErrMap) UnmarshalJSON(data []byte) error {
var tmp map[string]string
err := util.UnmarshalJSON(data, &tmp)
if err != nil {
return err
}
for k, v := range tmp {
iem[k] = fmt.Errorf("%s", v)
}
return nil
}
// SearchStatus is a section in the SearchResult reporting how many
// underlying indexes were queried, how many were successful/failed
// and a map of any errors that were encountered
type SearchStatus struct {
Total int `json:"total"`
Failed int `json:"failed"`
Successful int `json:"successful"`
Errors IndexErrMap `json:"errors,omitempty"`
}
// Merge will merge together multiple SearchStatuses during a MultiSearch
func (ss *SearchStatus) Merge(other *SearchStatus) {
ss.Total += other.Total
ss.Failed += other.Failed
ss.Successful += other.Successful
if len(other.Errors) > 0 {
if ss.Errors == nil {
ss.Errors = make(map[string]error)
}
for otherIndex, otherError := range other.Errors {
ss.Errors[otherIndex] = otherError
}
}
}
// A SearchResult describes the results of executing
// a SearchRequest.
//
// Status - Whether the search was executed on the underlying indexes successfully
// or failed, and the corresponding errors.
// Request - The SearchRequest that was executed.
// Hits - The list of documents that matched the query and their corresponding
// scores, score explanation, location info and so on.
// Total - The total number of documents that matched the query.
// Cost - indicates how expensive was the query with respect to bytes read
// from the mapped index files.
// MaxScore - The maximum score seen across all document hits seen for this query.
// Took - The time taken to execute the search.
// Facets - The facet results for the search.
type SearchResult struct {
Status *SearchStatus `json:"status"`
Request *SearchRequest `json:"request,omitempty"`
Hits search.DocumentMatchCollection `json:"hits"`
Total uint64 `json:"total_hits"`
Cost uint64 `json:"cost"`
MaxScore float64 `json:"max_score"`
Took time.Duration `json:"took"`
Facets search.FacetResults `json:"facets"`
// special fields that are applicable only for search
// results that are obtained from a presearch
SynonymResult search.FieldTermSynonymMap `json:"synonym_result,omitempty"`
// The following fields are applicable to BM25 preSearch
BM25Stats *search.BM25Stats `json:"bm25_stats,omitempty"`
}
func (sr *SearchResult) Size() int {
sizeInBytes := reflectStaticSizeSearchResult + size.SizeOfPtr +
reflectStaticSizeSearchStatus
for _, entry := range sr.Hits {
if entry != nil {
sizeInBytes += entry.Size()
}
}
for k, v := range sr.Facets {
sizeInBytes += size.SizeOfString + len(k) +
v.Size()
}
return sizeInBytes
}
func (sr *SearchResult) String() string {
rv := &strings.Builder{}
if sr.Total > 0 {
switch {
case sr.Request != nil && sr.Request.Size > 0:
start := sr.Request.From + 1
end := sr.Request.From + len(sr.Hits)
fmt.Fprintf(rv, "%d matches, showing %d through %d, took %s\n", sr.Total, start, end, sr.Took)
for i, hit := range sr.Hits {
rv = formatHit(rv, hit, start+i)
}
case sr.Request == nil:
fmt.Fprintf(rv, "%d matches, took %s\n", sr.Total, sr.Took)
for i, hit := range sr.Hits {
rv = formatHit(rv, hit, i+1)
}
default:
fmt.Fprintf(rv, "%d matches, took %s\n", sr.Total, sr.Took)
}
} else {
fmt.Fprintf(rv, "No matches\n")
}
if len(sr.Facets) > 0 {
fmt.Fprintf(rv, "Facets:\n")
for fn, f := range sr.Facets {
fmt.Fprintf(rv, "%s(%d)\n", fn, f.Total)
for _, t := range f.Terms.Terms() {
fmt.Fprintf(rv, "\t%s(%d)\n", t.Term, t.Count)
}
for _, n := range f.NumericRanges {
fmt.Fprintf(rv, "\t%s(%d)\n", n.Name, n.Count)
}
for _, d := range f.DateRanges {
fmt.Fprintf(rv, "\t%s(%d)\n", d.Name, d.Count)
}
if f.Other != 0 {
fmt.Fprintf(rv, "\tOther(%d)\n", f.Other)
}
}
}
return rv.String()
}
// formatHit is a helper function to format a single hit in the search result for
// the String() method of SearchResult
func formatHit(rv *strings.Builder, hit *search.DocumentMatch, hitNumber int) *strings.Builder {
fmt.Fprintf(rv, "%5d. %s (%f)\n", hitNumber, hit.ID, hit.Score)
for fragmentField, fragments := range hit.Fragments {
fmt.Fprintf(rv, "\t%s\n", fragmentField)
for _, fragment := range fragments {
fmt.Fprintf(rv, "\t\t%s\n", fragment)
}
}
for otherFieldName, otherFieldValue := range hit.Fields {
if otherFieldName == NestedDocumentKey {
continue
}
if _, ok := hit.Fragments[otherFieldName]; !ok {
fmt.Fprintf(rv, "\t%s\n", otherFieldName)
fmt.Fprintf(rv, "\t\t%v\n", otherFieldValue)
}
}
// nested documents
if nested, ok := hit.Fields[NestedDocumentKey]; ok {
if list, ok := nested.([]*search.NestedDocumentMatch); ok {
fmt.Fprintf(rv, "\t%s (%d nested documents)\n", NestedDocumentKey, len(list))
for ni, nd := range list {
fmt.Fprintf(rv, "\t\tNested #%d:\n", ni+1)
for f, frags := range nd.Fragments {
fmt.Fprintf(rv, "\t\t\t%s\n", f)
for _, frag := range frags {
fmt.Fprintf(rv, "\t\t\t\t%s\n", frag)
}
}
for f, v := range nd.Fields {
if _, ok := nd.Fragments[f]; !ok {
fmt.Fprintf(rv, "\t\t\t%s\n", f)
fmt.Fprintf(rv, "\t\t\t\t%v\n", v)
}
}
}
}
}
if len(hit.DecodedSort) > 0 {
fmt.Fprintf(rv, "\t_sort: [")
for k, v := range hit.DecodedSort {
if k > 0 {
fmt.Fprintf(rv, ", ")
}
fmt.Fprintf(rv, "%v", v)
}
fmt.Fprintf(rv, "]\n")
}
return rv
}
// Merge will merge together multiple SearchResults during a MultiSearch
func (sr *SearchResult) Merge(other *SearchResult) {
sr.Status.Merge(other.Status)
sr.Hits = append(sr.Hits, other.Hits...)
sr.Total += other.Total
sr.Cost += other.Cost
if other.MaxScore > sr.MaxScore {
sr.MaxScore = other.MaxScore
}
if sr.Facets == nil && len(other.Facets) != 0 {
sr.Facets = other.Facets
return
}
sr.Facets.Merge(other.Facets)
}
// MemoryNeededForSearchResult is an exported helper function to determine the RAM
// needed to accommodate the results for a given search request.
func MemoryNeededForSearchResult(req *SearchRequest) uint64 {
if req == nil {
return 0
}
numDocMatches := req.Size + req.From
if req.Size+req.From > collector.PreAllocSizeSkipCap {
numDocMatches = collector.PreAllocSizeSkipCap
}
estimate := 0
// overhead from the SearchResult structure
var sr SearchResult
estimate += sr.Size()
var dm search.DocumentMatch
sizeOfDocumentMatch := dm.Size()
// overhead from results
estimate += numDocMatches * sizeOfDocumentMatch
// overhead from facet results
if req.Facets != nil {
var fr search.FacetResult
estimate += len(req.Facets) * fr.Size()
}
// overhead from fields, highlighting
var d document.Document
if len(req.Fields) > 0 || req.Highlight != nil {
numDocsApplicable := req.Size
if numDocsApplicable > collector.PreAllocSizeSkipCap {
numDocsApplicable = collector.PreAllocSizeSkipCap
}
estimate += numDocsApplicable * d.Size()
}
return uint64(estimate)
}
// SetSortFunc sets the sort implementation to use when sorting hits.
//
// SearchRequests can specify a custom sort implementation to meet
// their needs. For instance, by specifying a parallel sort
// that uses all available cores.
func (r *SearchRequest) SetSortFunc(s func(sort.Interface)) {
r.sortFunc = s
}
// SortFunc returns the sort implementation to use when sorting hits.
// Defaults to sort.Sort.
func (r *SearchRequest) SortFunc() func(data sort.Interface) {
if r.sortFunc != nil {
return r.sortFunc
}
return sort.Sort
}
func isMatchNoneQuery(q query.Query) bool {
_, ok := q.(*query.MatchNoneQuery)
return ok
}
func isMatchAllQuery(q query.Query) bool {
_, ok := q.(*query.MatchAllQuery)
return ok
}
// Checks if the request is hybrid search. Currently supports: RRF, RSF.
func IsScoreFusionRequested(req *SearchRequest) bool {
switch req.Score {
case ScoreRRF, ScoreRSF:
return true
default:
return false
}
}
// Additional parameters in the search request. Currently only being
// used for score fusion parameters.
type RequestParams struct {
ScoreRankConstant int `json:"score_rank_constant,omitempty"`
ScoreWindowSize int `json:"score_window_size,omitempty"`
}
func NewDefaultParams(from, size int) *RequestParams {
return &RequestParams{
ScoreRankConstant: DefaultScoreRankConstant,
ScoreWindowSize: from + size,
}
}
func (p *RequestParams) UnmarshalJSON(input []byte) error {
var temp struct {
ScoreRankConstant *int `json:"score_rank_constant,omitempty"`
ScoreWindowSize *int `json:"score_window_size,omitempty"`
}
if err := util.UnmarshalJSON(input, &temp); err != nil {
return err
}
if temp.ScoreRankConstant != nil {
p.ScoreRankConstant = *temp.ScoreRankConstant
}
if temp.ScoreWindowSize != nil {
p.ScoreWindowSize = *temp.ScoreWindowSize
}
return nil
}
func (p *RequestParams) Validate(size int) error {
if p.ScoreWindowSize < 1 {
return fmt.Errorf("score window size must be greater than 0")
} else if p.ScoreWindowSize < size {
return fmt.Errorf("score window size must be greater than or equal to Size (%d)", size)
}
return nil
}
func ParseParams(r *SearchRequest, input []byte) (*RequestParams, error) {
params := NewDefaultParams(r.From, r.Size)
if len(input) == 0 {
return params, nil
}
err := util.UnmarshalJSON(input, params)
if err != nil {
return nil, err
}
// validate params
err = params.Validate(r.Size)
if err != nil {
return nil, err
}
return params, nil
}
================================================
FILE: search_knn.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package bleve
import (
"context"
"encoding/json"
"fmt"
"sort"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/collector"
"github.com/blevesearch/bleve/v2/search/query"
index "github.com/blevesearch/bleve_index_api"
)
const supportForVectorSearch = true
type knnOperator string
// Must be updated only at init
var BleveMaxK = int64(10000)
type SearchRequest struct {
ClientContextID string `json:"client_context_id,omitempty"`
Query query.Query `json:"query"`
Size int `json:"size"`
From int `json:"from"`
Highlight *HighlightRequest `json:"highlight"`
Fields []string `json:"fields"`
Facets FacetsRequest `json:"facets"`
Explain bool `json:"explain"`
Sort search.SortOrder `json:"sort"`
IncludeLocations bool `json:"includeLocations"`
Score string `json:"score,omitempty"`
SearchAfter []string `json:"search_after"`
SearchBefore []string `json:"search_before"`
KNN []*KNNRequest `json:"knn"`
KNNOperator knnOperator `json:"knn_operator"`
// PreSearchData will be a map that will be used
// in the second phase of any 2-phase search, to provide additional
// context to the second phase. This is useful in the case of index
// aliases where the first phase will gather the PreSearchData from all
// the indexes in the alias, and the second phase will use that
// PreSearchData to perform the actual search.
// The currently accepted map configuration is:
//
// "_knn_pre_search_data_key": []*search.DocumentMatch
PreSearchData map[string]interface{} `json:"pre_search_data,omitempty"`
Params *RequestParams `json:"params,omitempty"`
sortFunc func(sort.Interface)
}
// Vector takes precedence over vectorBase64 in case both fields are given
type KNNRequest struct {
Field string `json:"field"`
Vector []float32 `json:"vector"`
VectorBase64 string `json:"vector_base64"`
K int64 `json:"k"`
Boost *query.Boost `json:"boost,omitempty"`
// Search parameters for the field's vector index part of the segment.
// Value of it depends on the field's backing vector index implementation.
//
// For Faiss IVF index, supported search params are:
// - ivf_nprobe_pct : int // percentage of total clusters to search
// - ivf_max_codes_pct : float // percentage of total vectors to visit to do a query (across all clusters)
//
// Consult go-faiss to know all supported search params
Params json.RawMessage `json:"params"`
// Filter query to use with kNN pre-filtering.
// Supports pre-filtering with all existing types of query clauses.
FilterQuery query.Query `json:"filter,omitempty"`
}
func (r *SearchRequest) AddKNN(field string, vector []float32, k int64, boost float64) {
b := query.Boost(boost)
r.KNN = append(r.KNN, &KNNRequest{
Field: field,
Vector: vector,
K: k,
Boost: &b,
})
}
func (r *SearchRequest) AddKNNWithFilter(field string, vector []float32, k int64,
boost float64, filterQuery query.Query) {
b := query.Boost(boost)
r.KNN = append(r.KNN, &KNNRequest{
Field: field,
Vector: vector,
K: k,
Boost: &b,
FilterQuery: filterQuery,
})
}
func (r *SearchRequest) AddKNNOperator(operator knnOperator) {
r.KNNOperator = operator
}
// UnmarshalJSON deserializes a JSON representation of
// a SearchRequest
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
type tempKNNReq struct {
Field string `json:"field"`
Vector []float32 `json:"vector"`
VectorBase64 string `json:"vector_base64"`
K int64 `json:"k"`
Boost *query.Boost `json:"boost,omitempty"`
Params json.RawMessage `json:"params"`
FilterQuery json.RawMessage `json:"filter,omitempty"`
}
var temp struct {
Q json.RawMessage `json:"query"`
Size *int `json:"size"`
From int `json:"from"`
Highlight *HighlightRequest `json:"highlight"`
Fields []string `json:"fields"`
Facets FacetsRequest `json:"facets"`
Explain bool `json:"explain"`
Sort []json.RawMessage `json:"sort"`
IncludeLocations bool `json:"includeLocations"`
Score string `json:"score"`
SearchAfter []string `json:"search_after"`
SearchBefore []string `json:"search_before"`
KNN []*tempKNNReq `json:"knn"`
KNNOperator knnOperator `json:"knn_operator"`
PreSearchData json.RawMessage `json:"pre_search_data"`
Params json.RawMessage `json:"params"`
}
err := json.Unmarshal(input, &temp)
if err != nil {
return err
}
if temp.Size == nil {
r.Size = 10
} else {
r.Size = *temp.Size
}
if temp.Sort == nil {
r.Sort = search.SortOrder{&search.SortScore{Desc: true}}
} else {
r.Sort, err = search.ParseSortOrderJSON(temp.Sort)
if err != nil {
return err
}
}
r.From = temp.From
r.Explain = temp.Explain
r.Highlight = temp.Highlight
r.Fields = temp.Fields
r.Facets = temp.Facets
r.IncludeLocations = temp.IncludeLocations
r.Score = temp.Score
r.SearchAfter = temp.SearchAfter
r.SearchBefore = temp.SearchBefore
r.Query, err = query.ParseQuery(temp.Q)
if err != nil {
return err
}
if r.Size < 0 {
r.Size = 10
}
if r.From < 0 {
r.From = 0
}
if IsScoreFusionRequested(r) {
if temp.Params == nil {
// If params is not present and it is requires rescoring, assign
// default values
r.Params = NewDefaultParams(r.From, r.Size)
} else {
// if it is a request that requires rescoring, parse the rescoring
// parameters.
params, err := ParseParams(r, temp.Params)
if err != nil {
return err
}
r.Params = params
}
}
r.KNN = make([]*KNNRequest, len(temp.KNN))
for i, knnReq := range temp.KNN {
r.KNN[i] = &KNNRequest{}
r.KNN[i].Field = temp.KNN[i].Field
r.KNN[i].Vector = temp.KNN[i].Vector
r.KNN[i].VectorBase64 = temp.KNN[i].VectorBase64
r.KNN[i].K = temp.KNN[i].K
r.KNN[i].Boost = temp.KNN[i].Boost
r.KNN[i].Params = temp.KNN[i].Params
if len(knnReq.FilterQuery) == 0 {
// Setting this to nil to avoid ParseQuery() setting it to a match none
r.KNN[i].FilterQuery = nil
} else {
r.KNN[i].FilterQuery, err = query.ParseQuery(knnReq.FilterQuery)
if err != nil {
return err
}
}
}
r.KNNOperator = temp.KNNOperator
if r.KNNOperator == "" {
r.KNNOperator = knnOperatorOr
}
if temp.PreSearchData != nil {
r.PreSearchData, err = query.ParsePreSearchData(temp.PreSearchData)
if err != nil {
return err
}
}
return nil
}
// -----------------------------------------------------------------------------
func copySearchRequest(req *SearchRequest, preSearchData map[string]interface{}) *SearchRequest {
rv := SearchRequest{
Query: req.Query,
Size: req.Size + req.From,
From: 0,
Highlight: req.Highlight,
Fields: req.Fields,
Facets: req.Facets,
Explain: req.Explain,
Sort: req.Sort.Copy(),
IncludeLocations: req.IncludeLocations,
Score: req.Score,
SearchAfter: req.SearchAfter,
SearchBefore: req.SearchBefore,
KNN: req.KNN,
KNNOperator: req.KNNOperator,
PreSearchData: preSearchData,
Params: req.Params,
}
return &rv
}
var (
knnOperatorAnd = knnOperator("and")
knnOperatorOr = knnOperator("or")
)
func createKNNQuery(req *SearchRequest, knnFilterResults map[int]index.EligibleDocumentSelector) (
query.Query, []int64, int64, error) {
if requestHasKNN(req) {
// first perform validation
err := validateKNN(req)
if err != nil {
return nil, nil, 0, err
}
var subQueries []query.Query
kArray := make([]int64, 0, len(req.KNN))
sumOfK := int64(0)
for i, knn := range req.KNN {
// If it's a filtered kNN but has no eligible filter hits, then
// do not run the kNN query.
if selector, exists := knnFilterResults[i]; exists && selector == nil {
// if the kNN query is filtered and has no eligible filter hits, then
// do not run the kNN query, so we add a match_none query to the subQueries.
// this will ensure that the score breakdown is set to 0 for this kNN query.
subQueries = append(subQueries, NewMatchNoneQuery())
kArray = append(kArray, 0)
continue
}
knnQuery := query.NewKNNQuery(knn.Vector)
knnQuery.SetField(knn.Field)
knnQuery.SetK(knn.K)
knnQuery.SetBoost(knn.Boost.Value())
knnQuery.SetParams(knn.Params)
if selector, exists := knnFilterResults[i]; exists {
knnQuery.SetEligibleSelector(selector)
}
subQueries = append(subQueries, knnQuery)
kArray = append(kArray, knn.K)
sumOfK += knn.K
}
rv := query.NewDisjunctionQuery(subQueries)
rv.RetrieveScoreBreakdown(true)
return rv, kArray, sumOfK, nil
}
return nil, nil, 0, nil
}
func validateKNN(req *SearchRequest) error {
for _, q := range req.KNN {
if q == nil {
return fmt.Errorf("knn query cannot be nil")
}
if len(q.Vector) == 0 && q.VectorBase64 != "" {
// consider vector_base64 only if vector is not provided
decodedVector, err := document.DecodeVector(q.VectorBase64)
if err != nil {
return err
}
q.Vector = decodedVector
}
if q.K <= 0 || len(q.Vector) == 0 {
return fmt.Errorf("k must be greater than 0 and vector must be non-empty")
}
if q.K > BleveMaxK {
return fmt.Errorf("k must be less than %d", BleveMaxK)
}
// since the DefaultField is not applicable for knn,
// the field must be specified.
if q.Field == "" {
return fmt.Errorf("knn query field must be non-empty")
}
if vfq, ok := q.FilterQuery.(query.ValidatableQuery); ok {
err := vfq.Validate()
if err != nil {
return fmt.Errorf("knn filter query is invalid: %v", err)
}
}
}
switch req.KNNOperator {
case knnOperatorAnd, knnOperatorOr, "":
// Valid cases, do nothing
default:
return fmt.Errorf("knn_operator must be either 'and' / 'or'")
}
return nil
}
func addSortAndFieldsToKNNHits(req *SearchRequest, knnHits []*search.DocumentMatch, reader index.IndexReader, name string) (err error) {
requiredSortFields := req.Sort.RequiredFields()
var dvReader index.DocValueReader
var updateFieldVisitor index.DocValueVisitor
if len(requiredSortFields) > 0 {
dvReader, err = reader.DocValueReader(requiredSortFields)
if err != nil {
return err
}
updateFieldVisitor = func(field string, term []byte) {
req.Sort.UpdateVisitor(field, term)
}
}
for _, hit := range knnHits {
if len(requiredSortFields) > 0 {
err = dvReader.VisitDocValues(hit.IndexInternalID, updateFieldVisitor)
if err != nil {
return err
}
}
req.Sort.Value(hit)
err, _ = LoadAndHighlightAllFields(hit, req, "", reader, nil)
if err != nil {
return err
}
hit.Index = name
}
return nil
}
func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, reader index.IndexReader, preSearch bool) (knnHits []*search.DocumentMatch, err error) {
// Maps the index of a KNN query in the request to its pre-filter result:
// - If the KNN query is **not filtered**, the value will be `nil`.
// - If the KNN query **is filtered**, the value will be an eligible document selector
// that can be used to retrieve eligible documents.
// - If there is an **empty entry** for a KNN query, it means no documents match
// the filter query, and the KNN query can be skipped.
knnFilterResults := make(map[int]index.EligibleDocumentSelector)
for idx, knnReq := range req.KNN {
filterQ := knnReq.FilterQuery
if filterQ == nil || isMatchAllQuery(filterQ) {
// When there is no filter query or the filter query is match_all,
// all documents are eligible, and can be treated as unfiltered query.
continue
} else if isMatchNoneQuery(filterQ) {
// If the filter query is match_none, then no documents match the filter query.
knnFilterResults[idx] = nil
continue
}
// Applies to all supported types of queries.
filterSearcher, err := filterQ.Searcher(ctx, reader, i.m, search.SearcherOptions{
Score: "none", // just want eligible hits --> don't compute scores if not needed
})
if err != nil {
return nil, err
}
// Using the index doc count to determine collector size since we do not
// have an estimate of the number of eligible docs in the index yet.
indexDocCount, err := i.DocCount()
if err != nil {
// close the searcher before returning
filterSearcher.Close()
return nil, err
}
filterColl := collector.NewEligibleCollector(int(indexDocCount))
err = filterColl.Collect(ctx, filterSearcher, reader)
if err != nil {
// close the searcher before returning
filterSearcher.Close()
return nil, err
}
knnFilterResults[idx] = filterColl.EligibleSelector()
// Close the filter searcher, as we are done with it.
err = filterSearcher.Close()
if err != nil {
return nil, err
}
}
// Add the filter hits when creating the kNN query
KNNQuery, kArray, sumOfK, err := createKNNQuery(req, knnFilterResults)
if err != nil {
return nil, err
}
knnSearcher, err := KNNQuery.Searcher(ctx, reader, i.m, search.SearcherOptions{
Explain: req.Explain,
})
if err != nil {
return nil, err
}
defer func() {
if serr := knnSearcher.Close(); err == nil && serr != nil {
err = serr
}
}()
knnCollector := collector.NewKNNCollector(kArray, sumOfK)
err = knnCollector.Collect(ctx, knnSearcher, reader)
if err != nil {
return nil, err
}
knnHits = knnCollector.Results()
if !preSearch {
knnHits = finalizeKNNResults(req, knnHits)
}
// at this point, irrespective of whether it is a preSearch or not,
// the knn hits are populated with Sort and Fields.
// it must be ensured downstream that the Sort and Fields are not
// re-evaluated, for these hits.
// also add the index names to the hits, so that when early
// exit takes place after the first phase, the hits will have
// a valid value for Index.
err = addSortAndFieldsToKNNHits(req, knnHits, reader, i.name)
if err != nil {
return nil, err
}
return knnHits, nil
}
func setKnnHitsInCollector(knnHits []*search.DocumentMatch, coll *collector.TopNCollector) {
if len(knnHits) > 0 {
mergeFn := func(ftsMatch *search.DocumentMatch, knnMatch *search.DocumentMatch) {
// Boost the FTS score using the KNN score
ftsMatch.Score += knnMatch.Score
// Combine the FTS explanation with the KNN explanation, if present
ftsMatch.Expl.MergeWith(knnMatch.Expl)
}
coll.SetKNNHits(knnHits, search.HybridMergeCallbackFn(mergeFn))
}
}
func finalizeKNNResults(req *SearchRequest, knnHits []*search.DocumentMatch) []*search.DocumentMatch {
// if the KNN operator is AND, then we need to filter out the hits that
// do not have match the KNN queries.
if req.KNNOperator == knnOperatorAnd {
idx := 0
for _, hit := range knnHits {
if len(hit.ScoreBreakdown) == len(req.KNN) {
knnHits[idx] = hit
idx++
}
}
knnHits = knnHits[:idx]
}
// if score fusion required, return early because
// score breakdown is retained
if IsScoreFusionRequested(req) {
return knnHits
}
// fix the score using score breakdown now
// if the score is none, then we need to set the score to 0.0
// if req.Explain is true, then we need to use the expl breakdown to
// finalize the correct explanation.
for _, hit := range knnHits {
hit.Score = 0.0
if req.Score != "none" {
for _, score := range hit.ScoreBreakdown {
hit.Score += score
}
}
if req.Explain {
childrenExpl := make([]*search.Explanation, 0, len(hit.ScoreBreakdown))
for i := range hit.ScoreBreakdown {
childrenExpl = append(childrenExpl, hit.Expl.Children[i])
}
hit.Expl = &search.Explanation{Value: hit.Score, Message: "sum of:", Children: childrenExpl}
}
// we don't need the score breakdown anymore
// so we can set it to nil
hit.ScoreBreakdown = nil
}
return knnHits
}
// when we are setting KNN hits in the preSearchData, we need to make sure that
// the KNN hit goes to the right index. This is because the KNN hits are
// collected from all the indexes in the alias, but the preSearchData is
// specific to each index. If alias A1 contains indexes I1 and I2 and
// the KNN hits collected from both I1 and I2, and merged to get top K
// hits, then the top K hits need to be distributed to I1 and I2,
// so that the preSearchData for I1 contains the top K hits from I1 and
// the preSearchData for I2 contains the top K hits from I2.
func validateAndDistributeKNNHits(knnHits []*search.DocumentMatch, indexes []Index) (map[string][]*search.DocumentMatch, error) {
// create a set of all the index names of this alias
indexNames := make(map[string]struct{}, len(indexes))
for _, index := range indexes {
indexNames[index.Name()] = struct{}{}
}
segregatedKnnHits := make(map[string][]*search.DocumentMatch)
for _, hit := range knnHits {
// for each hit, we need to perform a validation check to ensure that the stack
// is still valid.
//
// if the stack is empty, then we have an inconsistency/abnormality
// since any hit with an empty stack is supposed to land on a leaf index,
// and not an alias. This cannot happen in normal circumstances. But
// performing this check to be safe. Since we extract the stack top
// in the following steps.
if len(hit.IndexNames) == 0 {
return nil, ErrorTwoPhaseSearchInconsistency
}
// since the stack is not empty, we need to check if the top of the stack
// is a valid index name, of an index that is part of this alias. If not,
// then we have an inconsistency that could be caused due to a topology
// change.
stackTopIdx := len(hit.IndexNames) - 1
top := hit.IndexNames[stackTopIdx]
if _, exists := indexNames[top]; !exists {
return nil, ErrorTwoPhaseSearchInconsistency
}
if stackTopIdx == 0 {
// if the stack consists of only one index, then popping the top
// would result in an empty slice, and handle this case by setting
// indexNames to nil. So that the final search results will not
// contain the indexNames field.
hit.IndexNames = nil
} else {
hit.IndexNames = hit.IndexNames[:stackTopIdx]
}
segregatedKnnHits[top] = append(segregatedKnnHits[top], hit)
}
return segregatedKnnHits, nil
}
func requestHasKNN(req *SearchRequest) bool {
return len(req.KNN) > 0
}
func numKNNQueries(req *SearchRequest) int {
return len(req.KNN)
}
// returns true if the search request contains a KNN request that can be
// satisfied by just performing a preSearch, completely bypassing the
// actual search.
func isKNNrequestSatisfiedByPreSearch(req *SearchRequest) bool {
// if req.Query is not match_none => then we need to go to phase 2
// to perform the actual query.
if !isMatchNoneQuery(req.Query) {
return false
}
// req.Query is a match_none query
//
// if request contains facets, we need to perform phase 2 to calculate
// the facet result. Since documents were removed as part of the
// merging process after phase 1, if the facet results were to be calculated
// during phase 1, then they will be now be incorrect, since merging would
// remove some documents.
if req.Facets != nil {
return false
}
// the request is a match_none query and does not contain any facets
// so we can satisfy the request using just the preSearch result.
return true
}
func constructKnnPreSearchData(mergedOut map[string]map[string]interface{}, preSearchResult *SearchResult,
indexes []Index) (map[string]map[string]interface{}, error) {
distributedHits, err := validateAndDistributeKNNHits([]*search.DocumentMatch(preSearchResult.Hits), indexes)
if err != nil {
return nil, err
}
for _, index := range indexes {
mergedOut[index.Name()][search.KnnPreSearchDataKey] = distributedHits[index.Name()]
}
return mergedOut, nil
}
func addKnnToDummyRequest(dummyReq *SearchRequest, realReq *SearchRequest) {
dummyReq.KNN = realReq.KNN
dummyReq.KNNOperator = knnOperatorOr
dummyReq.Explain = realReq.Explain
dummyReq.Fields = realReq.Fields
dummyReq.Sort = realReq.Sort
}
func newKnnPreSearchResultProcessor(req *SearchRequest) *knnPreSearchResultProcessor {
kArray := make([]int64, len(req.KNN))
for i, knnReq := range req.KNN {
kArray[i] = knnReq.K
}
knnStore := collector.GetNewKNNCollectorStore(kArray)
return &knnPreSearchResultProcessor{
addFn: func(sr *SearchResult, indexName string) {
for _, hit := range sr.Hits {
// tag the hit with the index name, so that when the
// final search result is constructed, the hit will have
// a valid path to follow along the alias tree to reach
// the index.
hit.IndexNames = append(hit.IndexNames, indexName)
knnStore.AddDocument(hit)
}
},
finalizeFn: func(sr *SearchResult) {
// passing nil as the document fixup function, because we don't need to
// fixup the document, since this was already done in the first phase,
// hence error is always nil.
// the merged knn hits are finalized and set in the search result.
sr.Hits, _ = knnStore.Final(nil)
},
}
}
// Replace knn boost values for fusion rescoring queries
func (r *rescorer) prepareKnnRequest() {
for i := range r.req.KNN {
b := r.req.KNN[i].Boost
if b != nil {
r.origBoosts[i+1] = b.Value()
newB := query.Boost(1.0)
r.req.KNN[i].Boost = &newB
} else {
r.origBoosts[i+1] = 1.0
}
}
}
// Restore knn boost values for fusion rescoring queries
func (r *rescorer) restoreKnnRequest() {
for i := range r.req.KNN {
b := query.Boost(r.origBoosts[i+1])
r.req.KNN[i].Boost = &b
}
}
================================================
FILE: search_knn_test.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build vectors
// +build vectors
package bleve
import (
"archive/zip"
"bytes"
"encoding/base64"
"encoding/binary"
"encoding/json"
"fmt"
"math"
"math/rand"
"reflect"
"sort"
"strconv"
"sync"
"testing"
"time"
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/v2/analysis/lang/en"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/query"
index "github.com/blevesearch/bleve_index_api"
)
const testInputCompressedFile = "test/knn/knn_dataset_queries.zip"
const testDatasetFileName = "knn_dataset.json"
const testQueryFileName = "knn_queries.json"
const testDatasetDims = 384
var knnOperators []knnOperator = []knnOperator{knnOperatorAnd, knnOperatorOr}
func TestSimilaritySearchPartitionedIndex(t *testing.T) {
dataset, searchRequests, err := readDatasetAndQueries(testInputCompressedFile)
if err != nil {
t.Fatal(err)
}
documents := makeDatasetIntoDocuments(dataset)
contentFieldMapping := NewTextFieldMapping()
contentFieldMapping.Analyzer = en.AnalyzerName
vecFieldMappingL2 := mapping.NewVectorFieldMapping()
vecFieldMappingL2.Dims = testDatasetDims
vecFieldMappingL2.Similarity = index.EuclideanDistance
indexMappingL2Norm := NewIndexMapping()
indexMappingL2Norm.DefaultMapping.AddFieldMappingsAt("content", contentFieldMapping)
indexMappingL2Norm.DefaultMapping.AddFieldMappingsAt("vector", vecFieldMappingL2)
vecFieldMappingDot := mapping.NewVectorFieldMapping()
vecFieldMappingDot.Dims = testDatasetDims
vecFieldMappingDot.Similarity = index.InnerProduct
indexMappingDotProduct := NewIndexMapping()
indexMappingDotProduct.DefaultMapping.AddFieldMappingsAt("content", contentFieldMapping)
indexMappingDotProduct.DefaultMapping.AddFieldMappingsAt("vector", vecFieldMappingDot)
vecFieldMappingCosine := mapping.NewVectorFieldMapping()
vecFieldMappingCosine.Dims = testDatasetDims
vecFieldMappingCosine.Similarity = index.CosineSimilarity
indexMappingCosine := NewIndexMapping()
indexMappingCosine.DefaultMapping.AddFieldMappingsAt("content", contentFieldMapping)
indexMappingCosine.DefaultMapping.AddFieldMappingsAt("vector", vecFieldMappingCosine)
type testCase struct {
testType string
queryIndex int
numIndexPartitions int
mapping mapping.IndexMapping
}
testCases := []testCase{
// l2 norm similarity
{
testType: "multi_partition:match_none:oneKNNreq:k=3",
queryIndex: 0,
numIndexPartitions: 4,
mapping: indexMappingL2Norm,
},
{
testType: "multi_partition:match_none:oneKNNreq:k=2",
queryIndex: 0,
numIndexPartitions: 10,
mapping: indexMappingL2Norm,
},
{
testType: "multi_partition:match:oneKNNreq:k=2",
queryIndex: 1,
numIndexPartitions: 5,
mapping: indexMappingL2Norm,
},
{
testType: "multi_partition:disjunction:twoKNNreq:k=2,2",
queryIndex: 2,
numIndexPartitions: 4,
mapping: indexMappingL2Norm,
},
// dot product similarity
{
testType: "multi_partition:match_none:oneKNNreq:k=3",
queryIndex: 0,
numIndexPartitions: 4,
mapping: indexMappingDotProduct,
},
{
testType: "multi_partition:match_none:oneKNNreq:k=2",
queryIndex: 0,
numIndexPartitions: 10,
mapping: indexMappingDotProduct,
},
{
testType: "multi_partition:match:oneKNNreq:k=2",
queryIndex: 1,
numIndexPartitions: 5,
mapping: indexMappingDotProduct,
},
{
testType: "multi_partition:disjunction:twoKNNreq:k=2,2",
queryIndex: 2,
numIndexPartitions: 4,
mapping: indexMappingDotProduct,
},
// cosine similarity
{
testType: "multi_partition:match_none:oneKNNreq:k=3",
queryIndex: 0,
numIndexPartitions: 7,
mapping: indexMappingCosine,
},
{
testType: "multi_partition:match_none:oneKNNreq:k=2",
queryIndex: 0,
numIndexPartitions: 5,
mapping: indexMappingCosine,
},
{
testType: "multi_partition:match:oneKNNreq:k=2",
queryIndex: 1,
numIndexPartitions: 3,
mapping: indexMappingCosine,
},
{
testType: "multi_partition:disjunction:twoKNNreq:k=2,2",
queryIndex: 2,
numIndexPartitions: 9,
mapping: indexMappingCosine,
},
}
index := NewIndexAlias()
var reqSort = search.SortOrder{&search.SortScore{Desc: true}, &search.SortDocID{Desc: true}, &search.SortField{Desc: false, Field: "content"}}
for testCaseNum, testCase := range testCases {
originalRequest := searchRequests[testCase.queryIndex]
for _, operator := range knnOperators {
index.indexes = make([]Index, 0)
query := copySearchRequest(originalRequest, nil)
query.AddKNNOperator(operator)
query.Sort = reqSort.Copy()
query.Explain = true
nameToIndex := createPartitionedIndex(documents, index, 1, testCase.mapping, t, false)
controlResult, err := index.Search(query)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
if !finalHitsHaveValidIndex(controlResult.Hits, nameToIndex) {
cleanUp(t, nameToIndex)
t.Fatalf("test case #%d failed: expected control result hits to have valid `Index`", testCaseNum)
}
cleanUp(t, nameToIndex)
index.indexes = make([]Index, 0)
query = copySearchRequest(originalRequest, nil)
query.AddKNNOperator(operator)
query.Sort = reqSort.Copy()
query.Explain = true
nameToIndex = createPartitionedIndex(documents, index, testCase.numIndexPartitions, testCase.mapping, t, false)
experimentalResult, err := index.Search(query)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
if !finalHitsHaveValidIndex(experimentalResult.Hits, nameToIndex) {
cleanUp(t, nameToIndex)
t.Fatalf("test case #%d failed: expected experimental Result hits to have valid `Index`", testCaseNum)
}
verifyResult(t, controlResult, experimentalResult, testCaseNum, true)
cleanUp(t, nameToIndex)
index.indexes = make([]Index, 0)
query = copySearchRequest(originalRequest, nil)
query.AddKNNOperator(operator)
query.Sort = reqSort.Copy()
query.Explain = true
nameToIndex = createPartitionedIndex(documents, index, testCase.numIndexPartitions, testCase.mapping, t, true)
multiLevelIndexResult, err := index.Search(query)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
if !finalHitsHaveValidIndex(multiLevelIndexResult.Hits, nameToIndex) {
cleanUp(t, nameToIndex)
t.Fatalf("test case #%d failed: expected experimental Result hits to have valid `Index`", testCaseNum)
}
verifyResult(t, multiLevelIndexResult, experimentalResult, testCaseNum, false)
cleanUp(t, nameToIndex)
}
}
var facets = map[string]*FacetRequest{
"content": {
Field: "content",
Size: 10,
},
}
index = NewIndexAlias()
for testCaseNum, testCase := range testCases {
index.indexes = make([]Index, 0)
nameToIndex := createPartitionedIndex(documents, index, testCase.numIndexPartitions, testCase.mapping, t, false)
originalRequest := searchRequests[testCase.queryIndex]
for _, operator := range knnOperators {
from, size := originalRequest.From, originalRequest.Size
query := copySearchRequest(originalRequest, nil)
query.AddKNNOperator(operator)
query.Explain = true
query.From = from
query.Size = size
// Three types of queries to run wrt sort and facet fields that require fields.
// 1. Sort And Facet are there
// 2. Sort is there, Facet is not there
// 3. Sort is not there, Facet is there
// The case where both sort and facet are not there is already covered in the previous tests.
// 1. Sort And Facet are there
query.Facets = facets
query.Sort = reqSort.Copy()
res1, err := index.Search(query)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
if !finalHitsHaveValidIndex(res1.Hits, nameToIndex) {
cleanUp(t, nameToIndex)
t.Fatalf("test case #%d failed: expected experimental Result hits to have valid `Index`", testCaseNum)
}
facetRes1 := res1.Facets
facetRes1Str, err := json.Marshal(facetRes1)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
// 2. Sort is there, Facet is not there
query.Facets = nil
query.Sort = reqSort.Copy()
res2, err := index.Search(query)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
if !finalHitsHaveValidIndex(res2.Hits, nameToIndex) {
cleanUp(t, nameToIndex)
t.Fatalf("test case #%d failed: expected experimental Result hits to have valid `Index`", testCaseNum)
}
// 3. Sort is not there, Facet is there
query.Facets = facets
query.Sort = nil
res3, err := index.Search(query)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
if !finalHitsHaveValidIndex(res3.Hits, nameToIndex) {
cleanUp(t, nameToIndex)
t.Fatalf("test case #%d failed: expected experimental Result hits to have valid `Index`", testCaseNum)
}
facetRes3 := res3.Facets
facetRes3Str, err := json.Marshal(facetRes3)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
// Verify the facet results
if string(facetRes1Str) != string(facetRes3Str) {
cleanUp(t, nameToIndex)
t.Fatalf("test case #%d failed: expected facet results to be equal", testCaseNum)
}
// Verify the results
verifyResult(t, res1, res2, testCaseNum, false)
verifyResult(t, res2, res3, testCaseNum, true)
// Test early exit fail case -> matchNone + facetRequest
query.Query = NewMatchNoneQuery()
query.Sort = reqSort.Copy()
// control case
query.Facets = nil
res4Ctrl, err := index.Search(query)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
if !finalHitsHaveValidIndex(res4Ctrl.Hits, nameToIndex) {
cleanUp(t, nameToIndex)
t.Fatalf("test case #%d failed: expected control Result hits to have valid `Index`", testCaseNum)
}
// experimental case
query.Facets = facets
res4Exp, err := index.Search(query)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
if !finalHitsHaveValidIndex(res4Exp.Hits, nameToIndex) {
cleanUp(t, nameToIndex)
t.Fatalf("test case #%d failed: expected experimental Result hits to have valid `Index`", testCaseNum)
}
if !(operator == knnOperatorAnd && res4Ctrl.Total == 0 && res4Exp.Total == 0) {
// catch case where no hits are returned
// due to matchNone query with a KNN request with operator AND
// where no hits are part of the intersection in multi knn request
verifyResult(t, res4Ctrl, res4Exp, testCaseNum, false)
}
}
cleanUp(t, nameToIndex)
}
// Test Pagination with multi partitioned index
index = NewIndexAlias()
index.indexes = make([]Index, 0)
nameToIndex := createPartitionedIndex(documents, index, 8, indexMappingL2Norm, t, true)
// Test From + Size pagination for Hybrid Search (2-Phase)
query := copySearchRequest(searchRequests[4], nil)
query.Sort = reqSort.Copy()
query.Facets = facets
query.Explain = true
testFromSizePagination(t, query, index, nameToIndex)
// Test From + Size pagination for Early Exit Hybrid Search (1-Phase)
query = copySearchRequest(searchRequests[4], nil)
query.Query = NewMatchNoneQuery()
query.Sort = reqSort.Copy()
query.Facets = nil
query.Explain = true
testFromSizePagination(t, query, index, nameToIndex)
cleanUp(t, nameToIndex)
}
func testFromSizePagination(t *testing.T, query *SearchRequest, index Index, nameToIndex map[string]Index) {
query.From = 0
query.Size = 30
resCtrl, err := index.Search(query)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
ctrlHitIds := make([]string, len(resCtrl.Hits))
for i, doc := range resCtrl.Hits {
ctrlHitIds[i] = doc.ID
}
// experimental case
fromValues := []int{0, 5, 10, 15, 20, 25}
size := 5
for fromIdx := 0; fromIdx < len(fromValues); fromIdx++ {
from := fromValues[fromIdx]
query.From = from
query.Size = size
resExp, err := index.Search(query)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
if from >= len(ctrlHitIds) {
if len(resExp.Hits) != 0 {
cleanUp(t, nameToIndex)
t.Fatalf("expected 0 hits, got %d", len(resExp.Hits))
}
continue
}
numHitsExp := len(resExp.Hits)
numHitsCtrl := min(len(ctrlHitIds)-from, size)
if numHitsExp != numHitsCtrl {
cleanUp(t, nameToIndex)
t.Fatalf("expected %d hits, got %d", numHitsCtrl, numHitsExp)
}
for i := 0; i < numHitsExp; i++ {
doc := resExp.Hits[i]
startOffset := from + i
if doc.ID != ctrlHitIds[startOffset] {
cleanUp(t, nameToIndex)
t.Fatalf("expected %s at index %d, got %s", ctrlHitIds[startOffset], i, doc.ID)
}
}
}
}
func TestVectorBase64Index(t *testing.T) {
dataset, searchRequests, err := readDatasetAndQueries(testInputCompressedFile)
if err != nil {
t.Fatal(err)
}
documents := makeDatasetIntoDocuments(dataset)
_, searchRequestsCopy, err := readDatasetAndQueries(testInputCompressedFile)
if err != nil {
t.Fatal(err)
}
for _, doc := range documents {
vec, ok := doc["vector"].([]float32)
if !ok {
t.Fatal("Typecasting vector to float array failed")
}
buf := new(bytes.Buffer)
for _, v := range vec {
err := binary.Write(buf, binary.LittleEndian, v)
if err != nil {
t.Fatal(err)
}
}
doc["vectorEncoded"] = base64.StdEncoding.EncodeToString(buf.Bytes())
}
for _, sr := range searchRequestsCopy {
for _, kr := range sr.KNN {
kr.Field = "vectorEncoded"
}
}
contentFM := NewTextFieldMapping()
contentFM.Analyzer = en.AnalyzerName
vecFML2 := mapping.NewVectorFieldMapping()
vecFML2.Dims = testDatasetDims
vecFML2.Similarity = index.EuclideanDistance
vecBFML2 := mapping.NewVectorBase64FieldMapping()
vecBFML2.Dims = testDatasetDims
vecBFML2.Similarity = index.EuclideanDistance
vecFMDot := mapping.NewVectorFieldMapping()
vecFMDot.Dims = testDatasetDims
vecFMDot.Similarity = index.InnerProduct
vecBFMDot := mapping.NewVectorBase64FieldMapping()
vecBFMDot.Dims = testDatasetDims
vecBFMDot.Similarity = index.InnerProduct
indexMappingL2 := NewIndexMapping()
indexMappingL2.DefaultMapping.AddFieldMappingsAt("content", contentFM)
indexMappingL2.DefaultMapping.AddFieldMappingsAt("vector", vecFML2)
indexMappingL2.DefaultMapping.AddFieldMappingsAt("vectorEncoded", vecBFML2)
indexMappingDot := NewIndexMapping()
indexMappingDot.DefaultMapping.AddFieldMappingsAt("content", contentFM)
indexMappingDot.DefaultMapping.AddFieldMappingsAt("vector", vecFMDot)
indexMappingDot.DefaultMapping.AddFieldMappingsAt("vectorEncoded", vecBFMDot)
tmpIndexPathL2 := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPathL2)
tmpIndexPathDot := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPathDot)
indexL2, err := New(tmpIndexPathL2, indexMappingL2)
if err != nil {
t.Fatal(err)
}
defer func() {
err := indexL2.Close()
if err != nil {
t.Fatal(err)
}
}()
indexDot, err := New(tmpIndexPathDot, indexMappingDot)
if err != nil {
t.Fatal(err)
}
defer func() {
err := indexDot.Close()
if err != nil {
t.Fatal(err)
}
}()
batchL2 := indexL2.NewBatch()
batchDot := indexDot.NewBatch()
for _, doc := range documents {
err = batchL2.Index(doc["id"].(string), doc)
if err != nil {
t.Fatal(err)
}
err = batchDot.Index(doc["id"].(string), doc)
if err != nil {
t.Fatal(err)
}
}
err = indexL2.Batch(batchL2)
if err != nil {
t.Fatal(err)
}
err = indexDot.Batch(batchDot)
if err != nil {
t.Fatal(err)
}
for i := range searchRequests {
for _, operator := range knnOperators {
controlQuery := searchRequests[i]
testQuery := searchRequestsCopy[i]
controlQuery.AddKNNOperator(operator)
testQuery.AddKNNOperator(operator)
controlResultL2, err := indexL2.Search(controlQuery)
if err != nil {
t.Fatal(err)
}
testResultL2, err := indexL2.Search(testQuery)
if err != nil {
t.Fatal(err)
}
if controlResultL2 != nil && testResultL2 != nil {
if len(controlResultL2.Hits) == len(testResultL2.Hits) {
for j := range controlResultL2.Hits {
if controlResultL2.Hits[j].ID != testResultL2.Hits[j].ID {
t.Fatalf("testcase %d failed: expected hit id %s, got hit id %s", i, controlResultL2.Hits[j].ID, testResultL2.Hits[j].ID)
}
}
}
} else if (controlResultL2 == nil && testResultL2 != nil) ||
(controlResultL2 != nil && testResultL2 == nil) {
t.Fatalf("testcase %d failed: expected result %s, got result %s", i, controlResultL2, testResultL2)
}
controlResultDot, err := indexDot.Search(controlQuery)
if err != nil {
t.Fatal(err)
}
testResultDot, err := indexDot.Search(testQuery)
if err != nil {
t.Fatal(err)
}
if controlResultDot != nil && testResultDot != nil {
if len(controlResultDot.Hits) == len(testResultDot.Hits) {
for j := range controlResultDot.Hits {
if controlResultDot.Hits[j].ID != testResultDot.Hits[j].ID {
t.Fatalf("testcase %d failed: expected hit id %s, got hit id %s", i, controlResultDot.Hits[j].ID, testResultDot.Hits[j].ID)
}
}
}
} else if (controlResultDot == nil && testResultDot != nil) ||
(controlResultDot != nil && testResultDot == nil) {
t.Fatalf("testcase %d failed: expected result %s, got result %s", i, controlResultDot, testResultDot)
}
}
}
}
// Test to verify that the bivf-flat indexes with vector base64 field mapping returns the
// same results as the non-optimized vector field mapping for L2, Dot Product and Cosine similarities.
// Also test to see no differences in results for any distance metric
func TestVectorBivfIndexes(t *testing.T) {
optimizations := []string{index.IndexBIVFWithBackingSQ8, index.IndexBIVFWithBackingFlat}
for _, optimization := range optimizations {
testVectorBivfIndex(t, optimization)
}
}
func testVectorBivfIndex(t *testing.T, optimization string) {
dataset, searchRequests, err := readDatasetAndQueries(testInputCompressedFile)
if err != nil {
t.Fatal(err)
}
documents := makeDatasetIntoDocuments(dataset)
_, searchRequestsCopy, err := readDatasetAndQueries(testInputCompressedFile)
if err != nil {
t.Fatal(err)
}
for _, doc := range documents {
vec, ok := doc["vector"].([]float32)
if !ok {
t.Fatal("Typecasting vector to float array failed")
}
buf := new(bytes.Buffer)
for _, v := range vec {
err := binary.Write(buf, binary.LittleEndian, v)
if err != nil {
t.Fatal(err)
}
}
doc["vectorEncoded"] = base64.StdEncoding.EncodeToString(buf.Bytes())
}
for _, sr := range searchRequestsCopy {
for _, kr := range sr.KNN {
kr.Field = "vectorEncoded"
}
}
contentFM := NewTextFieldMapping()
contentFM.Analyzer = en.AnalyzerName
vecFML2 := mapping.NewVectorFieldMapping()
vecFML2.Dims = testDatasetDims
vecFML2.Similarity = index.EuclideanDistance
vecFML2.VectorIndexOptimizedFor = optimization
vecBFML2 := mapping.NewVectorBase64FieldMapping()
vecBFML2.Dims = testDatasetDims
vecBFML2.Similarity = index.EuclideanDistance
vecBFML2.VectorIndexOptimizedFor = optimization
vecFMDot := mapping.NewVectorFieldMapping()
vecFMDot.Dims = testDatasetDims
vecFMDot.Similarity = index.InnerProduct
vecFMDot.VectorIndexOptimizedFor = optimization
vecBFMDot := mapping.NewVectorBase64FieldMapping()
vecBFMDot.Dims = testDatasetDims
vecBFMDot.Similarity = index.InnerProduct
vecBFMDot.VectorIndexOptimizedFor = optimization
vecFMCosine := mapping.NewVectorFieldMapping()
vecFMCosine.Dims = testDatasetDims
vecFMCosine.Similarity = index.CosineSimilarity
vecBFMCosine := mapping.NewVectorBase64FieldMapping()
vecBFMCosine.Dims = testDatasetDims
vecBFMCosine.Similarity = index.CosineSimilarity
vecBFMCosine.VectorIndexOptimizedFor = optimization
indexMappingL2 := NewIndexMapping()
indexMappingL2.DefaultMapping.AddFieldMappingsAt("content", contentFM)
indexMappingL2.DefaultMapping.AddFieldMappingsAt("vector", vecFML2)
indexMappingL2.DefaultMapping.AddFieldMappingsAt("vectorEncoded", vecBFML2)
indexMappingDot := NewIndexMapping()
indexMappingDot.DefaultMapping.AddFieldMappingsAt("content", contentFM)
indexMappingDot.DefaultMapping.AddFieldMappingsAt("vector", vecFMDot)
indexMappingDot.DefaultMapping.AddFieldMappingsAt("vectorEncoded", vecBFMDot)
indexMappingCosine := NewIndexMapping()
indexMappingCosine.DefaultMapping.AddFieldMappingsAt("content", contentFM)
indexMappingCosine.DefaultMapping.AddFieldMappingsAt("vector", vecFMCosine)
indexMappingCosine.DefaultMapping.AddFieldMappingsAt("vectorEncoded", vecBFMCosine)
tmpIndexPathL2 := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPathL2)
tmpIndexPathDot := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPathDot)
tmpIndexPathCosine := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPathCosine)
indexL2, err := New(tmpIndexPathL2, indexMappingL2)
if err != nil {
t.Fatal(err)
}
defer func() {
err := indexL2.Close()
if err != nil {
t.Fatal(err)
}
}()
indexDot, err := New(tmpIndexPathDot, indexMappingDot)
if err != nil {
t.Fatal(err)
}
defer func() {
err := indexDot.Close()
if err != nil {
t.Fatal(err)
}
}()
indexCosine, err := New(tmpIndexPathCosine, indexMappingCosine)
if err != nil {
t.Fatal(err)
}
defer func() {
err := indexCosine.Close()
if err != nil {
t.Fatal(err)
}
}()
batchL2 := indexL2.NewBatch()
batchDot := indexDot.NewBatch()
batchCosine := indexCosine.NewBatch()
for _, doc := range documents {
err = batchL2.Index(doc["id"].(string), doc)
if err != nil {
t.Fatal(err)
}
err = batchDot.Index(doc["id"].(string), doc)
if err != nil {
t.Fatal(err)
}
err = batchCosine.Index(doc["id"].(string), doc)
if err != nil {
t.Fatal(err)
}
}
err = indexL2.Batch(batchL2)
if err != nil {
t.Fatal(err)
}
err = indexDot.Batch(batchDot)
if err != nil {
t.Fatal(err)
}
err = indexCosine.Batch(batchCosine)
if err != nil {
t.Fatal(err)
}
for i := range searchRequests {
for _, operator := range knnOperators {
normQuery := searchRequests[i]
base64Query := searchRequestsCopy[i]
normQuery.AddKNNOperator(operator)
base64Query.AddKNNOperator(operator)
normResultL2, err := indexL2.Search(normQuery)
if err != nil {
t.Fatal(err)
}
base64ResultL2, err := indexL2.Search(base64Query)
if err != nil {
t.Fatal(err)
}
if normResultL2 != nil && base64ResultL2 != nil {
if len(normResultL2.Hits) == len(base64ResultL2.Hits) {
for j := range normResultL2.Hits {
if normResultL2.Hits[j].ID != base64ResultL2.Hits[j].ID {
t.Fatalf("testcase %d failed: expected hit id %s, got hit id %s", i, normResultL2.Hits[j].ID, base64ResultL2.Hits[j].ID)
}
}
}
} else if (normResultL2 == nil && base64ResultL2 != nil) ||
(normResultL2 != nil && base64ResultL2 == nil) {
t.Fatalf("testcase %d failed: expected result %s, got result %s", i, normResultL2, base64ResultL2)
}
normResultDot, err := indexDot.Search(normQuery)
if err != nil {
t.Fatal(err)
}
base64ResultDot, err := indexDot.Search(base64Query)
if err != nil {
t.Fatal(err)
}
if normResultDot != nil && base64ResultDot != nil {
if len(normResultDot.Hits) == len(base64ResultDot.Hits) {
for j := range normResultDot.Hits {
if normResultDot.Hits[j].ID != base64ResultDot.Hits[j].ID {
t.Fatalf("testcase %d failed: expected hit id %s, got hit id %s", i, normResultDot.Hits[j].ID, base64ResultDot.Hits[j].ID)
}
}
}
} else if (normResultDot == nil && base64ResultDot != nil) ||
(normResultDot != nil && base64ResultDot == nil) {
t.Fatalf("testcase %d failed: expected result %s, got result %s", i, normResultDot, base64ResultDot)
}
normResultCosine, err := indexCosine.Search(normQuery)
if err != nil {
t.Fatal(err)
}
base64ResultCosine, err := indexCosine.Search(base64Query)
if err != nil {
t.Fatal(err)
}
if normResultCosine != nil && base64ResultCosine != nil {
if len(normResultCosine.Hits) == len(base64ResultCosine.Hits) {
for j := range normResultCosine.Hits {
if normResultCosine.Hits[j].ID != base64ResultCosine.Hits[j].ID {
t.Fatalf("testcase %d failed: expected hit id %s, got hit id %s", i, normResultCosine.Hits[j].ID, base64ResultCosine.Hits[j].ID)
}
}
}
} else if (normResultCosine == nil && base64ResultCosine != nil) ||
(normResultCosine != nil && base64ResultCosine == nil) {
t.Fatalf("testcase %d failed: expected result %s, got result %s", i, normResultCosine, base64ResultCosine)
}
if normResultCosine != nil && normResultL2 != nil {
if len(normResultCosine.Hits) == len(normResultL2.Hits) {
for j := range normResultCosine.Hits {
if normResultCosine.Hits[j].ID != normResultL2.Hits[j].ID {
if normResultCosine.Hits[j].Score != normResultL2.Hits[j].Score {
t.Fatalf("testcase %d failed: expected hit id %s, got hit id %s", i, normResultCosine.Hits[j].ID, normResultL2.Hits[j].ID)
}
}
}
}
} else if (normResultCosine == nil && normResultL2 != nil) ||
(normResultCosine != nil && normResultL2 == nil) {
t.Fatalf("testcase %d failed: expected result %s, got result %s", i, normResultCosine, normResultL2)
}
if normResultCosine != nil && normResultDot != nil {
if len(normResultCosine.Hits) == len(normResultDot.Hits) {
for j := range normResultCosine.Hits {
if normResultCosine.Hits[j].ID != normResultDot.Hits[j].ID {
if normResultCosine.Hits[j].Score != normResultDot.Hits[j].Score {
t.Fatalf("testcase %d failed: expected hit id %s, got hit id %s", i, normResultCosine.Hits[j].ID, normResultDot.Hits[j].ID)
}
}
}
}
} else if (normResultCosine == nil && normResultDot != nil) ||
(normResultCosine != nil && normResultDot == nil) {
t.Fatalf("testcase %d failed: expected result %s, got result %s", i, normResultCosine, normResultDot)
}
}
}
}
type testDocument struct {
ID string `json:"id"`
Content string `json:"content"`
Vector []float32 `json:"vector"`
}
func readDatasetAndQueries(fileName string) ([]testDocument, []*SearchRequest, error) {
// Open the zip archive for reading
r, err := zip.OpenReader(fileName)
if err != nil {
return nil, nil, err
}
var dataset []testDocument
var queries []*SearchRequest
defer r.Close()
for _, f := range r.File {
jsonFile, err := f.Open()
if err != nil {
return nil, nil, err
}
defer jsonFile.Close()
if f.Name == testDatasetFileName {
err = json.NewDecoder(jsonFile).Decode(&dataset)
if err != nil {
return nil, nil, err
}
} else if f.Name == testQueryFileName {
err = json.NewDecoder(jsonFile).Decode(&queries)
if err != nil {
return nil, nil, err
}
}
}
return dataset, queries, nil
}
func makeDatasetIntoDocuments(dataset []testDocument) []map[string]interface{} {
documents := make([]map[string]interface{}, len(dataset))
for i := 0; i < len(dataset); i++ {
document := make(map[string]interface{})
document["id"] = dataset[i].ID
document["content"] = dataset[i].Content
document["vector"] = dataset[i].Vector
documents[i] = document
}
return documents
}
func cleanUp(t *testing.T, nameToIndex map[string]Index) {
for path, childIndex := range nameToIndex {
err := childIndex.Close()
if err != nil {
t.Fatal(err)
}
cleanupTmpIndexPath(t, path)
}
}
func createChildIndex(docs []map[string]interface{}, mapping mapping.IndexMapping, t *testing.T, nameToIndex map[string]Index) Index {
tmpIndexPath := createTmpIndexPath(t)
index, err := New(tmpIndexPath, mapping)
if err != nil {
t.Fatal(err)
}
nameToIndex[index.Name()] = index
batch := index.NewBatch()
for _, doc := range docs {
err := batch.Index(doc["id"].(string), doc)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
}
err = index.Batch(batch)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
return index
}
func createPartitionedIndex(documents []map[string]interface{}, index *indexAliasImpl, numPartitions int,
mapping mapping.IndexMapping, t *testing.T, multiLevel bool) map[string]Index {
partitionSize := len(documents) / numPartitions
extraDocs := len(documents) % numPartitions
numDocsPerPartition := make([]int, numPartitions)
for i := 0; i < numPartitions; i++ {
numDocsPerPartition[i] = partitionSize
if extraDocs > 0 {
numDocsPerPartition[i]++
extraDocs--
}
}
docsPerPartition := make([][]map[string]interface{}, numPartitions)
prevCutoff := 0
for i := 0; i < numPartitions; i++ {
docsPerPartition[i] = make([]map[string]interface{}, numDocsPerPartition[i])
for j := 0; j < numDocsPerPartition[i]; j++ {
docsPerPartition[i][j] = documents[prevCutoff+j]
}
prevCutoff += numDocsPerPartition[i]
}
rv := make(map[string]Index)
if !multiLevel {
// all indexes are at the same level
for i := 0; i < numPartitions; i++ {
index.Add(createChildIndex(docsPerPartition[i], mapping, t, rv))
}
} else {
// alias tree
indexes := make([]Index, numPartitions)
for i := 0; i < numPartitions; i++ {
indexes[i] = createChildIndex(docsPerPartition[i], mapping, t, rv)
}
numAlias := int(math.Ceil(float64(numPartitions) / 2.0))
aliases := make([]IndexAlias, numAlias)
for i := 0; i < numAlias; i++ {
aliases[i] = NewIndexAlias()
aliases[i].SetName(fmt.Sprintf("alias%d", i))
for j := 0; j < 2; j++ {
if i*2+j < numPartitions {
aliases[i].Add(indexes[i*2+j])
}
}
}
for i := 0; i < numAlias; i++ {
index.Add(aliases[i])
}
}
return rv
}
func createMultipleSegmentsIndex(documents []map[string]interface{}, index Index, numSegments int) error {
// create multiple batches to simulate more than one segment
numBatches := numSegments
batches := make([]*Batch, numBatches)
numDocsPerBatch := len(documents) / numBatches
extraDocs := len(documents) % numBatches
docsPerBatch := make([]int, numBatches)
for i := 0; i < numBatches; i++ {
docsPerBatch[i] = numDocsPerBatch
if extraDocs > 0 {
docsPerBatch[i]++
extraDocs--
}
}
prevCutoff := 0
for i := 0; i < numBatches; i++ {
batches[i] = index.NewBatch()
for j := prevCutoff; j < prevCutoff+docsPerBatch[i]; j++ {
doc := documents[j]
err := batches[i].Index(doc["id"].(string), doc)
if err != nil {
return err
}
}
prevCutoff += docsPerBatch[i]
}
errMutex := sync.Mutex{}
var errors []error
wg := sync.WaitGroup{}
wg.Add(len(batches))
for i, batch := range batches {
go func(ix int, batchx *Batch) {
defer wg.Done()
err := index.Batch(batchx)
if err != nil {
errMutex.Lock()
errors = append(errors, err)
errMutex.Unlock()
}
}(i, batch)
}
wg.Wait()
if len(errors) > 0 {
return errors[0]
}
return nil
}
func truncateScore(score float64) float64 {
epsilon := 1e-4
truncated := float64(int(score*1e6)) / 1e6
if math.Abs(truncated-1.0) <= epsilon {
return 1.0
}
return truncated
}
// Function to compare two Explanation structs recursively
func compareExplanation(a, b *search.Explanation) bool {
if a == nil && b == nil {
return true
}
if a == nil || b == nil {
return false
}
if truncateScore(a.Value) != truncateScore(b.Value) || len(a.Children) != len(b.Children) {
return false
}
// Sort the children slices before comparison
sortChildren(a.Children)
sortChildren(b.Children)
for i := range a.Children {
if !compareExplanation(a.Children[i], b.Children[i]) {
return false
}
}
return true
}
// Function to sort the children slices
func sortChildren(children []*search.Explanation) {
sort.Slice(children, func(i, j int) bool {
return children[i].Value < children[j].Value
})
}
// All hits from a hybrid search/knn search should not have
// index names or score breakdown.
func finalHitsOmitKNNMetadata(hits []*search.DocumentMatch) bool {
for _, hit := range hits {
if hit.IndexNames != nil || hit.ScoreBreakdown != nil {
return false
}
}
return true
}
func finalHitsHaveValidIndex(hits []*search.DocumentMatch, indexes map[string]Index) bool {
for _, hit := range hits {
if hit.Index == "" {
return false
}
var idx Index
var ok bool
if idx, ok = indexes[hit.Index]; !ok {
return false
}
if idx == nil {
return false
}
var doc index.Document
doc, err = idx.Document(hit.ID)
if err != nil {
return false
}
if doc == nil {
return false
}
}
return true
}
func verifyResult(t *testing.T, controlResult *SearchResult, experimentalResult *SearchResult, testCaseNum int, verifyOnlyDocIDs bool) {
if controlResult.Hits.Len() == 0 || experimentalResult.Hits.Len() == 0 {
t.Fatalf("test case #%d failed: 0 hits returned", testCaseNum)
}
if len(controlResult.Hits) != len(experimentalResult.Hits) {
t.Fatalf("test case #%d failed: expected %d results, got %d", testCaseNum, len(controlResult.Hits), len(experimentalResult.Hits))
}
if controlResult.Total != experimentalResult.Total {
t.Fatalf("test case #%d failed: expected total hits to be %d, got %d", testCaseNum, controlResult.Total, experimentalResult.Total)
}
// KNN Metadata -> Score Breakdown and IndexNames MUST be omitted from the final hits
if !finalHitsOmitKNNMetadata(controlResult.Hits) || !finalHitsOmitKNNMetadata(experimentalResult.Hits) {
t.Fatalf("test case #%d failed: expected no KNN metadata in hits", testCaseNum)
}
if controlResult.Took == 0 || experimentalResult.Took == 0 {
t.Fatalf("test case #%d failed: expected non-zero took time", testCaseNum)
}
if controlResult.Request == nil || experimentalResult.Request == nil {
t.Fatalf("test case #%d failed: expected non-nil request", testCaseNum)
}
if verifyOnlyDocIDs {
// in multi partitioned index, we cannot be sure of the score or the ordering of the hits as the tf-idf scores are localized to each partition
// so we only check the ids
controlMap := make(map[string]struct{})
experimentalMap := make(map[string]struct{})
for _, hit := range controlResult.Hits {
controlMap[hit.ID] = struct{}{}
}
for _, hit := range experimentalResult.Hits {
experimentalMap[hit.ID] = struct{}{}
}
if len(controlMap) != len(experimentalMap) {
t.Fatalf("test case #%d failed: expected %d results, got %d", testCaseNum, len(controlMap), len(experimentalMap))
}
for id := range controlMap {
if _, ok := experimentalMap[id]; !ok {
t.Fatalf("test case #%d failed: expected id %s to be in experimental result", testCaseNum, id)
}
}
return
}
for i := 0; i < len(controlResult.Hits); i++ {
if controlResult.Hits[i].ID != experimentalResult.Hits[i].ID {
t.Fatalf("test case #%d failed: expected hit %d to have id %s, got %s", testCaseNum, i, controlResult.Hits[i].ID, experimentalResult.Hits[i].ID)
}
// Truncate to 6 decimal places
actualScore := truncateScore(experimentalResult.Hits[i].Score)
expectScore := truncateScore(controlResult.Hits[i].Score)
if expectScore != actualScore {
t.Fatalf("test case #%d failed: expected hit %d to have score %f, got %f", testCaseNum, i, expectScore, actualScore)
}
if !compareExplanation(controlResult.Hits[i].Expl, experimentalResult.Hits[i].Expl) {
t.Fatalf("test case #%d failed: expected hit %d to have explanation %v, got %v", testCaseNum, i, controlResult.Hits[i].Expl, experimentalResult.Hits[i].Expl)
}
}
if truncateScore(controlResult.MaxScore) != truncateScore(experimentalResult.MaxScore) {
t.Fatalf("test case #%d: expected maxScore to be %f, got %f", testCaseNum, controlResult.MaxScore, experimentalResult.MaxScore)
}
}
func TestSimilaritySearchMultipleSegments(t *testing.T) {
// using scorch options to prevent merges during the course of this test
// so that the knnCollector can be accurately tested
scorch.DefaultMemoryPressurePauseThreshold = 0
scorch.DefaultMinSegmentsForInMemoryMerge = math.MaxInt
dataset, searchRequests, err := readDatasetAndQueries(testInputCompressedFile)
if err != nil {
t.Fatal(err)
}
documents := makeDatasetIntoDocuments(dataset)
contentFieldMapping := NewTextFieldMapping()
contentFieldMapping.Analyzer = en.AnalyzerName
vecFieldMappingL2 := mapping.NewVectorFieldMapping()
vecFieldMappingL2.Dims = testDatasetDims
vecFieldMappingL2.Similarity = index.EuclideanDistance
vecFieldMappingDot := mapping.NewVectorFieldMapping()
vecFieldMappingDot.Dims = testDatasetDims
vecFieldMappingDot.Similarity = index.InnerProduct
vecFieldMappingCosine := mapping.NewVectorFieldMapping()
vecFieldMappingCosine.Dims = testDatasetDims
vecFieldMappingCosine.Similarity = index.CosineSimilarity
indexMappingL2Norm := NewIndexMapping()
indexMappingL2Norm.DefaultMapping.AddFieldMappingsAt("content", contentFieldMapping)
indexMappingL2Norm.DefaultMapping.AddFieldMappingsAt("vector", vecFieldMappingL2)
indexMappingDotProduct := NewIndexMapping()
indexMappingDotProduct.DefaultMapping.AddFieldMappingsAt("content", contentFieldMapping)
indexMappingDotProduct.DefaultMapping.AddFieldMappingsAt("vector", vecFieldMappingDot)
indexMappingCosine := NewIndexMapping()
indexMappingCosine.DefaultMapping.AddFieldMappingsAt("content", contentFieldMapping)
indexMappingCosine.DefaultMapping.AddFieldMappingsAt("vector", vecFieldMappingCosine)
var reqSort = search.SortOrder{&search.SortScore{Desc: true}, &search.SortDocID{Desc: true}, &search.SortField{Desc: false, Field: "content"}}
testCases := []struct {
numSegments int
queryIndex int
mapping mapping.IndexMapping
scoreValue string
}{
// L2 norm similarity
{
numSegments: 6,
queryIndex: 0,
mapping: indexMappingL2Norm,
},
{
numSegments: 7,
queryIndex: 1,
mapping: indexMappingL2Norm,
},
{
numSegments: 8,
queryIndex: 2,
mapping: indexMappingL2Norm,
},
{
numSegments: 9,
queryIndex: 3,
mapping: indexMappingL2Norm,
},
{
numSegments: 10,
queryIndex: 4,
mapping: indexMappingL2Norm,
},
{
numSegments: 11,
queryIndex: 5,
mapping: indexMappingL2Norm,
},
// dot_product similarity
{
numSegments: 6,
queryIndex: 0,
mapping: indexMappingDotProduct,
},
{
numSegments: 7,
queryIndex: 1,
mapping: indexMappingDotProduct,
},
{
numSegments: 8,
queryIndex: 2,
mapping: indexMappingDotProduct,
},
{
numSegments: 9,
queryIndex: 3,
mapping: indexMappingDotProduct,
},
{
numSegments: 10,
queryIndex: 4,
mapping: indexMappingDotProduct,
},
{
numSegments: 11,
queryIndex: 5,
mapping: indexMappingDotProduct,
},
// cosine similarity
{
numSegments: 9,
queryIndex: 0,
mapping: indexMappingCosine,
},
{
numSegments: 5,
queryIndex: 1,
mapping: indexMappingCosine,
},
{
numSegments: 4,
queryIndex: 2,
mapping: indexMappingCosine,
},
{
numSegments: 12,
queryIndex: 3,
mapping: indexMappingCosine,
},
{
numSegments: 7,
queryIndex: 4,
mapping: indexMappingCosine,
},
{
numSegments: 11,
queryIndex: 5,
mapping: indexMappingCosine,
},
// score none test
{
numSegments: 3,
queryIndex: 0,
mapping: indexMappingL2Norm,
scoreValue: "none",
},
{
numSegments: 7,
queryIndex: 1,
mapping: indexMappingL2Norm,
scoreValue: "none",
},
{
numSegments: 8,
queryIndex: 2,
mapping: indexMappingL2Norm,
scoreValue: "none",
},
{
numSegments: 3,
queryIndex: 0,
mapping: indexMappingDotProduct,
scoreValue: "none",
},
{
numSegments: 7,
queryIndex: 1,
mapping: indexMappingDotProduct,
scoreValue: "none",
},
{
numSegments: 8,
queryIndex: 2,
mapping: indexMappingDotProduct,
scoreValue: "none",
},
{
numSegments: 3,
queryIndex: 0,
mapping: indexMappingCosine,
scoreValue: "none",
},
{
numSegments: 7,
queryIndex: 1,
mapping: indexMappingCosine,
scoreValue: "none",
},
{
numSegments: 8,
queryIndex: 2,
mapping: indexMappingCosine,
scoreValue: "none",
},
}
for testCaseNum, testCase := range testCases {
originalRequest := searchRequests[testCase.queryIndex]
for _, operator := range knnOperators {
// run single segment test first
tmpIndexPath := createTmpIndexPath(t)
index, err := New(tmpIndexPath, testCase.mapping)
if err != nil {
t.Fatal(err)
}
query := copySearchRequest(originalRequest, nil)
query.Sort = reqSort.Copy()
query.AddKNNOperator(operator)
query.Explain = true
nameToIndex := make(map[string]Index)
nameToIndex[index.Name()] = index
err = createMultipleSegmentsIndex(documents, index, 1)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
controlResult, err := index.Search(query)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
if !finalHitsHaveValidIndex(controlResult.Hits, nameToIndex) {
cleanUp(t, nameToIndex)
t.Fatalf("test case #%d failed: expected control result hits to have valid `Index`", testCaseNum)
}
if testCase.scoreValue == "none" {
query := copySearchRequest(originalRequest, nil)
query.Sort = reqSort.Copy()
query.AddKNNOperator(operator)
query.Explain = true
query.Score = testCase.scoreValue
expectedResultScoreNone, err := index.Search(query)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
if !finalHitsHaveValidIndex(expectedResultScoreNone.Hits, nameToIndex) {
cleanUp(t, nameToIndex)
t.Fatalf("test case #%d failed: expected score none hits to have valid `Index`", testCaseNum)
}
verifyResult(t, controlResult, expectedResultScoreNone, testCaseNum, true)
}
cleanUp(t, nameToIndex)
// run multiple segments test
tmpIndexPath = createTmpIndexPath(t)
index, err = New(tmpIndexPath, testCase.mapping)
if err != nil {
t.Fatal(err)
}
nameToIndex = make(map[string]Index)
nameToIndex[index.Name()] = index
err = createMultipleSegmentsIndex(documents, index, testCase.numSegments)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
query = copySearchRequest(originalRequest, nil)
query.Sort = reqSort.Copy()
query.AddKNNOperator(operator)
query.Explain = true
experimentalResult, err := index.Search(query)
if err != nil {
cleanUp(t, nameToIndex)
t.Fatal(err)
}
if !finalHitsHaveValidIndex(experimentalResult.Hits, nameToIndex) {
cleanUp(t, nameToIndex)
t.Fatalf("test case #%d failed: expected experimental result hits to have valid `Index`", testCaseNum)
}
verifyResult(t, controlResult, experimentalResult, testCaseNum, false)
cleanUp(t, nameToIndex)
}
}
}
// Test to determine the impact of boost on kNN queries.
func TestKNNScoreBoosting(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
const dims = 5
getRandomVector := func() []float32 {
vec := make([]float32, dims)
for i := 0; i < dims; i++ {
vec[i] = rand.Float32()
}
return vec
}
dataset := make([]map[string]interface{}, 10)
// Indexing just a few docs to populate index.
for i := 0; i < 100; i++ {
dataset = append(dataset, map[string]interface{}{
"type": "vectorStuff",
"content": strconv.Itoa(i),
"vector": getRandomVector(),
})
}
indexMapping := NewIndexMapping()
indexMapping.TypeField = "type"
indexMapping.DefaultAnalyzer = "en"
documentMapping := NewDocumentMapping()
indexMapping.AddDocumentMapping("vectorStuff", documentMapping)
contentFieldMapping := NewTextFieldMapping()
contentFieldMapping.Index = true
contentFieldMapping.Store = true
documentMapping.AddFieldMappingsAt("content", contentFieldMapping)
vecFieldMapping := mapping.NewVectorFieldMapping()
vecFieldMapping.Index = true
vecFieldMapping.Dims = 5
vecFieldMapping.Similarity = "dot_product"
documentMapping.AddFieldMappingsAt("vector", vecFieldMapping)
index, err := New(tmpIndexPath, indexMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
batch := index.NewBatch()
for i := 0; i < len(dataset); i++ {
err = batch.Index(strconv.Itoa(i), dataset[i])
if err != nil {
t.Fatal(err)
}
}
err = index.Batch(batch)
if err != nil {
t.Fatal(err)
}
queryVec := getRandomVector()
searchRequest := NewSearchRequest(NewMatchNoneQuery())
searchRequest.AddKNN("vector", queryVec, 3, 1.0)
searchRequest.Fields = []string{"content", "vector"}
hits, err := index.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
hitsMap := make(map[string]float64, 0)
for _, hit := range hits.Hits {
hitsMap[hit.ID] = (hit.Score)
}
searchRequest = NewSearchRequest(NewMatchNoneQuery())
searchRequest.AddKNN("vector", queryVec, 3, 10.0)
searchRequest.Fields = []string{"content", "vector"}
hits, err = index.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
hitsMap2 := make(map[string]float64, 0)
for _, hit := range hits.Hits {
hitsMap2[hit.ID] = (hit.Score)
}
for _, hit := range hits.Hits {
if hitsMap[hit.ID] != hitsMap2[hit.ID]/10 {
t.Errorf("boosting not working: %v %v \n", hitsMap[hit.ID], hitsMap2[hit.ID])
}
}
}
// Test to see if KNN Operators get added right to the query.
func TestKNNOperator(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
const dims = 5
getRandomVector := func() []float32 {
vec := make([]float32, dims)
for i := 0; i < dims; i++ {
vec[i] = rand.Float32()
}
return vec
}
dataset := make([]map[string]interface{}, 10)
// Indexing just a few docs to populate index.
for i := 0; i < 10; i++ {
dataset = append(dataset, map[string]interface{}{
"type": "vectorStuff",
"content": strconv.Itoa(i),
"vector": getRandomVector(),
})
}
indexMapping := NewIndexMapping()
indexMapping.TypeField = "type"
indexMapping.DefaultAnalyzer = "en"
documentMapping := NewDocumentMapping()
indexMapping.AddDocumentMapping("vectorStuff", documentMapping)
contentFieldMapping := NewTextFieldMapping()
contentFieldMapping.Index = true
contentFieldMapping.Store = true
documentMapping.AddFieldMappingsAt("content", contentFieldMapping)
vecFieldMapping := mapping.NewVectorFieldMapping()
vecFieldMapping.Index = true
vecFieldMapping.Dims = 5
vecFieldMapping.Similarity = "dot_product"
documentMapping.AddFieldMappingsAt("vector", vecFieldMapping)
index, err := New(tmpIndexPath, indexMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
batch := index.NewBatch()
for i := 0; i < len(dataset); i++ {
err = batch.Index(strconv.Itoa(i), dataset[i])
if err != nil {
t.Fatal(err)
}
}
err = index.Batch(batch)
if err != nil {
t.Fatal(err)
}
termQuery := query.NewTermQuery("2")
searchRequest := NewSearchRequest(termQuery)
searchRequest.AddKNN("vector", getRandomVector(), 3, 2.0)
searchRequest.AddKNN("vector", getRandomVector(), 2, 1.5)
searchRequest.Fields = []string{"content", "vector"}
// Conjunction
searchRequest.AddKNNOperator(knnOperatorAnd)
conjunction, _, _, err := createKNNQuery(searchRequest, nil)
if err != nil {
t.Fatalf("unexpected error for AND knn operator")
}
conj, ok := conjunction.(*query.DisjunctionQuery)
if !ok {
t.Fatalf("expected disjunction query")
}
if len(conj.Disjuncts) != 2 {
t.Fatalf("expected 2 disjuncts")
}
// Disjunction
searchRequest.AddKNNOperator(knnOperatorOr)
disjunction, _, _, err := createKNNQuery(searchRequest, nil)
if err != nil {
t.Fatalf("unexpected error for OR knn operator")
}
disj, ok := disjunction.(*query.DisjunctionQuery)
if !ok {
t.Fatalf("expected disjunction query")
}
if len(disj.Disjuncts) != 2 {
t.Fatalf("expected 2 disjuncts")
}
// Incorrect operator.
searchRequest.AddKNNOperator("bs_op")
searchRequest.Query, _, _, err = createKNNQuery(searchRequest, nil)
if err == nil {
t.Fatalf("expected error for incorrect knn operator")
}
}
func TestKNNFiltering(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
const dims = 5
getRandomVector := func() []float32 {
vec := make([]float32, dims)
for i := 0; i < dims; i++ {
vec[i] = rand.Float32()
}
return vec
}
dataset := make([]map[string]interface{}, 0)
// Indexing just a few docs to populate index.
for i := 0; i < 10; i++ {
dataset = append(dataset, map[string]interface{}{
"type": "vectorStuff",
"content": strconv.Itoa(i + 1000),
"vector": getRandomVector(),
})
}
indexMapping := NewIndexMapping()
indexMapping.TypeField = "type"
indexMapping.DefaultAnalyzer = "en"
documentMapping := NewDocumentMapping()
indexMapping.AddDocumentMapping("vectorStuff", documentMapping)
contentFieldMapping := NewTextFieldMapping()
contentFieldMapping.Index = true
contentFieldMapping.Store = true
documentMapping.AddFieldMappingsAt("content", contentFieldMapping)
vecFieldMapping := mapping.NewVectorFieldMapping()
vecFieldMapping.Index = true
vecFieldMapping.Dims = 5
vecFieldMapping.Similarity = "dot_product"
documentMapping.AddFieldMappingsAt("vector", vecFieldMapping)
index, err := New(tmpIndexPath, indexMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
batch := index.NewBatch()
for i := 0; i < len(dataset); i++ {
// the id of term "i" is (i-1000)
err = batch.Index(strconv.Itoa(i), dataset[i])
if err != nil {
t.Fatal(err)
}
}
err = index.Batch(batch)
if err != nil {
t.Fatal(err)
}
termQuery := query.NewTermQuery("1004")
filterRequest := NewSearchRequest(termQuery)
filteredHits, err := index.Search(filterRequest)
if err != nil {
t.Fatal(err)
}
filteredDocIDs := make(map[string]struct{})
for _, match := range filteredHits.Hits {
filteredDocIDs[match.ID] = struct{}{}
}
searchRequest := NewSearchRequest(NewMatchNoneQuery())
searchRequest.AddKNNWithFilter("vector", getRandomVector(), 3, 2.0, termQuery)
searchRequest.Fields = []string{"content", "vector"}
res, err := index.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
// check if any of the returned results are not part of the filtered hits.
for _, match := range res.Hits {
if _, exists := filteredDocIDs[match.ID]; !exists {
t.Errorf("returned result not present in filtered hits")
}
}
// No results should be returned with a match_none filter.
searchRequest = NewSearchRequest(NewMatchNoneQuery())
searchRequest.AddKNNWithFilter("vector", getRandomVector(), 3, 2.0,
NewMatchNoneQuery())
res, err = index.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
if len(res.Hits) != 0 {
t.Errorf("match none filter should return no hits")
}
// Testing with a disjunction query.
termQuery = query.NewTermQuery("1003")
termQuery2 := query.NewTermQuery("1005")
disjQuery := query.NewDisjunctionQuery([]query.Query{termQuery, termQuery2})
filterRequest = NewSearchRequest(disjQuery)
filteredHits, err = index.Search(filterRequest)
if err != nil {
t.Fatal(err)
}
filteredDocIDs = make(map[string]struct{})
for _, match := range filteredHits.Hits {
filteredDocIDs[match.ID] = struct{}{}
}
searchRequest = NewSearchRequest(NewMatchNoneQuery())
searchRequest.AddKNNWithFilter("vector", getRandomVector(), 3, 2.0, disjQuery)
searchRequest.Fields = []string{"content", "vector"}
res, err = index.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
for _, match := range res.Hits {
if _, exists := filteredDocIDs[match.ID]; !exists {
t.Errorf("returned result not present in filtered hits")
}
}
}
// -----------------------------------------------------------------------------
// Test nested vectors
func TestNestedVectors(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
const dims = 3
const k = 1 // one nearest neighbor
const vecFieldName = "vecData"
dataset := map[string]map[string]interface{}{ // docID -> Doc
"doc1": {
vecFieldName: []float32{100, 100, 100},
},
"doc2": {
vecFieldName: [][]float32{{0, 0, 0}, {1000, 1000, 1000}},
},
}
// Index mapping
indexMapping := NewIndexMapping()
vm := mapping.NewVectorFieldMapping()
vm.Dims = dims
vm.Similarity = "l2_norm"
indexMapping.DefaultMapping.AddFieldMappingsAt(vecFieldName, vm)
// Create index and upload documents
index, err := New(tmpIndexPath, indexMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
batch := index.NewBatch()
for docID, doc := range dataset {
err = batch.Index(docID, doc)
if err != nil {
t.Fatal(err)
}
}
err = index.Batch(batch)
if err != nil {
t.Fatal(err)
}
// Run searches
tests := []struct {
queryVec []float32
expectedDocID string
}{
{
queryVec: []float32{100, 100, 100},
expectedDocID: "doc1",
},
{
queryVec: []float32{0, 0, 0},
expectedDocID: "doc2",
},
{
queryVec: []float32{1000, 1000, 1000},
expectedDocID: "doc2",
},
}
for _, test := range tests {
searchReq := NewSearchRequest(query.NewMatchNoneQuery())
searchReq.AddKNNWithFilter(vecFieldName, test.queryVec, k, 1000,
NewMatchAllQuery())
res, err := index.Search(searchReq)
if err != nil {
t.Fatal(err)
}
if len(res.Hits) != 1 {
t.Fatalf("expected 1 hit, got %d", len(res.Hits))
}
if res.Hits[0].ID != test.expectedDocID {
t.Fatalf("expected docID %s, got %s", test.expectedDocID,
res.Hits[0].ID)
}
}
}
// -----------------------------------------------------------------------------
// TestMultiVector tests the KNN functionality which handles duplicate
// vectors being matched within the same document. When a document has multiple vectors
// (via [[]] array of vectors or [{}] array of objects with vectors), the KNN
// searcher must pick the best scoring vector match for that document. This test covers these scenarios:
// - Single vector field (baseline)
// - [[]] style: array of vectors (same doc appears multiple times)
// - [{}] style: array of objects with vector field (chunks pattern)
func TestMultiVector(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
// JSON documents covering merger scenarios:
// - Single vector (baseline)
// - [[]] style: array of vectors (same doc appears multiple times)
// - [{}] style: array of objects with vector field (chunks pattern)
docs := map[string]string{
// Single vector - baseline
"doc1": `{
"vec": [10, 10, 10],
"vecB": [100, 100, 100]
}`,
// [[]] style - array of 2 vectors
"doc2": `{
"vec": [[0, 0, 0], [500, 500, 500]],
"vecB": [[900, 900, 900], [950, 950, 950], [975, 975, 975], [990, 990, 990]]
}`,
// [[]] style - array of 3 vectors
"doc3": `{
"vec": [[50, 50, 50], [200, 200, 200], [400, 400, 400]],
"vecB": [[800, 800, 800], [850, 850, 850]]
}`,
// Single vector - baseline
"doc4": `{
"vec": [1000, 1000, 1000],
"vecB": [1, 1, 1]
}`,
// [{}] style - array of objects with vector field (chunks pattern)
"doc5": `{
"chunks": [
{"vec": [10, 10, 10], "text": "chunk1"},
{"vec": [20, 20, 20], "text": "chunk2"},
{"vec": [30, 30, 30], "text": "chunk3"},
{"vec": [40, 40, 40], "text": "chunk4"}
]
}`,
"doc6": `{
"chunks": [
{"vec": [[10, 10, 10],[20, 20, 20]], "text": "chunk1"},
{"vec": [[30, 30, 30],[40, 40, 40]], "text": "chunk2"}
]
}`,
}
// Parse JSON documents
dataset := make(map[string]map[string]interface{})
for docID, jsonStr := range docs {
var doc map[string]interface{}
if err := json.Unmarshal([]byte(jsonStr), &doc); err != nil {
t.Fatalf("failed to unmarshal %s: %v", docID, err)
}
dataset[docID] = doc
}
// Index mapping
indexMapping := NewIndexMapping()
vecMapping := mapping.NewVectorFieldMapping()
vecMapping.Dims = 3
vecMapping.Similarity = index.InnerProduct
indexMapping.DefaultMapping.AddFieldMappingsAt("vec", vecMapping)
indexMapping.DefaultMapping.AddFieldMappingsAt("vecB", vecMapping)
// Nested chunks mapping for [{}] style
chunksMapping := mapping.NewDocumentMapping()
chunksMapping.AddFieldMappingsAt("vec", vecMapping)
indexMapping.DefaultMapping.AddSubDocumentMapping("chunks", chunksMapping)
// Create and populate index
idx, err := New(tmpIndexPath, indexMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
if err := idx.Close(); err != nil {
t.Fatal(err)
}
}()
batch := idx.NewBatch()
for docID, doc := range dataset {
if err := batch.Index(docID, doc); err != nil {
t.Fatal(err)
}
}
if err := idx.Batch(batch); err != nil {
t.Fatal(err)
}
// Test: Single KNN query - basic functionality
t.Run("VecFieldSingle", func(t *testing.T) {
searchReq := NewSearchRequest(query.NewMatchNoneQuery())
searchReq.AddKNN("vec", []float32{1, 1, 1}, 20, 1.0)
res, err := idx.Search(searchReq)
if err != nil {
t.Fatal(err)
}
// Inner product: score = sum(query_i * doc_i)
// doc1 vec=[10,10,10]: 1*10*3 = 30
// doc2 vec best is [500,500,500]: 1*500*3 = 1500
// doc3 vec best is [400,400,400]: 1*400*3 = 1200
// doc4 vec=[1000,1000,1000]: 1*1000*3 = 3000
expectedResult := []struct {
docID string
expectedScore float64
}{
{docID: "doc4", expectedScore: 3000},
{docID: "doc2", expectedScore: 1500},
{docID: "doc3", expectedScore: 1200},
{docID: "doc1", expectedScore: 30},
}
if len(res.Hits) != len(expectedResult) {
t.Fatalf("expected %d hits, got %d", len(expectedResult), len(res.Hits))
}
for i, expected := range expectedResult {
if res.Hits[i].ID != expected.docID {
t.Fatalf("at rank %d, expected docID %s, got %s", i+1, expected.docID, res.Hits[i].ID)
}
if res.Hits[i].Score != expected.expectedScore {
t.Fatalf("at rank %d, expected score %v, got %v", i+1, expected.expectedScore, res.Hits[i].Score)
}
}
})
// Test: Single KNN query on vecB field
t.Run("VecBFieldSingle", func(t *testing.T) {
searchReq := NewSearchRequest(query.NewMatchNoneQuery())
searchReq.AddKNN("vecB", []float32{1000, 1000, 1000}, 20, 1.0)
res, err := idx.Search(searchReq)
if err != nil {
t.Fatal(err)
}
// Inner product: score = sum(query_i * doc_i) for each dimension
// doc1: vecB=[100,100,100] -> 1000*100*3 = 300,000
// doc2: vecB best is [990,990,990] -> 1000*990*3 = 2,970,000
// doc3: vecB best is [850,850,850] -> 1000*850*3 = 2,550,000
// doc4: vecB=[1,1,1] -> 1000*1*3 = 3,000
expectedResult := []struct {
docID string
expectedScore float64
}{
{docID: "doc2", expectedScore: 2970000},
{docID: "doc3", expectedScore: 2550000},
{docID: "doc1", expectedScore: 300000},
{docID: "doc4", expectedScore: 3000},
}
if len(res.Hits) != len(expectedResult) {
t.Fatalf("expected %d hits, got %d", len(expectedResult), len(res.Hits))
}
for i, expected := range expectedResult {
if res.Hits[i].ID != expected.docID {
t.Fatalf("at rank %d, expected docID %s, got %s", i+1, expected.docID, res.Hits[i].ID)
}
if res.Hits[i].Score != expected.expectedScore {
t.Fatalf("at rank %d, expected score %v, got %v", i+1, expected.expectedScore, res.Hits[i].Score)
}
}
})
// Test: Single KNN query on nested chunks.vec field
t.Run("ChunksVecFieldSingle", func(t *testing.T) {
searchReq := NewSearchRequest(query.NewMatchNoneQuery())
searchReq.AddKNN("chunks.vec", []float32{1, 1, 1}, 20, 1.0)
searchReq.SortBy([]string{"_score", "docID"})
res, err := idx.Search(searchReq)
if err != nil {
t.Fatal(err)
}
// Only doc5 and doc6 have chunks.vec
// doc5 chunks: [10,10,10], [20,20,20], [30,30,30], [40,40,40]
// Best score: 1*40*3 = 120
// doc6 chunks: [[10,10,10],[20,20,20]], [[30,30,30],[40,40,40]]
// Best score: 1*40*3 = 120
if len(res.Hits) != 2 {
t.Fatalf("expected 2 hits, got %d", len(res.Hits))
}
// Both should have score 120
for _, hit := range res.Hits {
if hit.ID != "doc5" && hit.ID != "doc6" {
t.Fatalf("unexpected docID %s, expected doc5 or doc6", hit.ID)
}
if hit.Score != 120 {
t.Fatalf("for %s, expected score 120, got %v", hit.ID, hit.Score)
}
}
})
}
// TestMultiVectorCosineNormalization verifies that multi-vector fields are
// normalized correctly with cosine similarity. Each sub-vector in a multi-vector
// should be independently normalized, producing correct similarity scores.
func TestMultiVectorCosineNormalization(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
const dims = 3
// Create index with cosine similarity
indexMapping := NewIndexMapping()
vecFieldMapping := mapping.NewVectorFieldMapping()
vecFieldMapping.Dims = dims
vecFieldMapping.Similarity = index.CosineSimilarity
// Single-vector field
indexMapping.DefaultMapping.AddFieldMappingsAt("vec", vecFieldMapping)
// Multi-vector field
indexMapping.DefaultMapping.AddFieldMappingsAt("multi_vec", vecFieldMapping)
idx, err := New(tmpIndexPath, indexMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
docsString := []string{
`{"vec": [3, 0, 0]}`,
`{"vec": [0, 4, 0]}`,
`{"multi_vec": [[3, 0, 0], [0, 4, 0]]}`,
}
for i, docStr := range docsString {
var doc map[string]interface{}
err = json.Unmarshal([]byte(docStr), &doc)
if err != nil {
t.Fatal(err)
}
err = idx.Index(fmt.Sprintf("doc%d", i+1), doc)
if err != nil {
t.Fatal(err)
}
}
// Query for X direction [1,0,0]
searchReq := NewSearchRequest(query.NewMatchNoneQuery())
searchReq.AddKNN("vec", []float32{1, 0, 0}, 3, 1.0)
res, err := idx.Search(searchReq)
if err != nil {
t.Fatal(err)
}
if len(res.Hits) != 2 {
t.Fatalf("expected 2 hits, got %d", len(res.Hits))
}
// Hit 1 should be doc1 with score 1.0 (perfect match)
if res.Hits[0].ID != "doc1" {
t.Fatalf("expected doc1 as first hit, got %s", res.Hits[0].ID)
}
if math.Abs(float64(res.Hits[0].Score-1.0)) > 1e-6 {
t.Fatalf("expected score 1.0, got %f", res.Hits[0].Score)
}
// Hit 2 should be doc2 with a score of 0.0 (orthogonal)
if res.Hits[1].ID != "doc2" {
t.Fatalf("expected doc2 as second hit, got %s", res.Hits[1].ID)
}
if math.Abs(float64(res.Hits[1].Score-0.0)) > 1e-6 {
t.Fatalf("expected score 0.0, got %f", res.Hits[1].Score)
}
// Query for Y direction [0,1,0]
searchReq = NewSearchRequest(query.NewMatchNoneQuery())
searchReq.AddKNN("vec", []float32{0, 1, 0}, 3, 1.0)
res, err = idx.Search(searchReq)
if err != nil {
t.Fatal(err)
}
if len(res.Hits) != 2 {
t.Fatalf("expected 2 hits, got %d", len(res.Hits))
}
// Hit 1 should be doc2 with score 1.0 (perfect match)
if res.Hits[0].ID != "doc2" {
t.Fatalf("expected doc2 as first hit, got %s", res.Hits[0].ID)
}
if math.Abs(float64(res.Hits[0].Score-1.0)) > 1e-6 {
t.Fatalf("expected score 1.0, got %f", res.Hits[0].Score)
}
// Hit 2 should be doc1 with a score of 0.0 (orthogonal)
if res.Hits[1].ID != "doc1" {
t.Fatalf("expected doc1 as second hit, got %s", res.Hits[1].ID)
}
if math.Abs(float64(res.Hits[1].Score-0.0)) > 1e-6 {
t.Fatalf("expected score 0.0, got %f", res.Hits[1].Score)
}
// Now test querying the nested multi-vector field
searchReq = NewSearchRequest(query.NewMatchNoneQuery())
searchReq.AddKNN("multi_vec", []float32{1, 0, 0}, 3, 1.0)
res, err = idx.Search(searchReq)
if err != nil {
t.Fatal(err)
}
if len(res.Hits) != 1 {
t.Fatalf("expected 1 hit, got %d", len(res.Hits))
}
// Hit should be doc3 with score 1.0 (perfect match on first sub-vector)
if res.Hits[0].ID != "doc3" {
t.Fatalf("expected doc3 as first hit, got %s", res.Hits[0].ID)
}
if math.Abs(float64(res.Hits[0].Score-1.0)) > 1e-6 {
t.Fatalf("expected score 1.0, got %f", res.Hits[0].Score)
}
// Query for Y direction [0,1,0] on nested field
searchReq = NewSearchRequest(query.NewMatchNoneQuery())
searchReq.AddKNN("multi_vec", []float32{0, 1, 0}, 3, 1.0)
res, err = idx.Search(searchReq)
if err != nil {
t.Fatal(err)
}
if len(res.Hits) != 1 {
t.Fatalf("expected 1 hit, got %d", len(res.Hits))
}
// Hit should be doc3 with score 1.0 (perfect match on second sub-vector)
if res.Hits[0].ID != "doc3" {
t.Fatalf("expected doc3 as first hit, got %s", res.Hits[0].ID)
}
if math.Abs(float64(res.Hits[0].Score-1.0)) > 1e-6 {
t.Fatalf("expected score 1.0, got %f", res.Hits[0].Score)
}
}
func TestNumVecsStat(t *testing.T) {
dataset, _, err := readDatasetAndQueries(testInputCompressedFile)
if err != nil {
t.Fatal(err)
}
documents := makeDatasetIntoDocuments(dataset)
indexMapping := NewIndexMapping()
contentFieldMapping := NewTextFieldMapping()
contentFieldMapping.Analyzer = en.AnalyzerName
indexMapping.DefaultMapping.AddFieldMappingsAt("content", contentFieldMapping)
vecFieldMapping1 := mapping.NewVectorFieldMapping()
vecFieldMapping1.Dims = testDatasetDims
vecFieldMapping1.Similarity = index.EuclideanDistance
indexMapping.DefaultMapping.AddFieldMappingsAt("vector", vecFieldMapping1)
tmpIndexPath := createTmpIndexPath(t)
index, err := New(tmpIndexPath, indexMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
for i := 0; i < 10; i++ {
batch := index.NewBatch()
for j := 0; j < 3; j++ {
for k := 0; k < 10; k++ {
err := batch.Index(fmt.Sprintf("%d", i*30+j*10+k), documents[j*10+k])
if err != nil {
t.Fatal(err)
}
}
}
err = index.Batch(batch)
if err != nil {
t.Fatal(err)
}
}
statsMap := index.StatsMap()
if indexStats, exists := statsMap["index"]; exists {
if indexStatsMap, ok := indexStats.(map[string]interface{}); ok {
v1, ok := indexStatsMap["field:vector:num_vectors"].(uint64)
if !ok || v1 != uint64(300) {
t.Fatalf("mismatch in the number of vectors, expected 300, got %d", indexStatsMap["field:vector:num_vectors"])
}
}
}
}
func TestIndexUpdateVector(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
indexMappingBefore := mapping.NewIndexMapping()
indexMappingBefore.TypeMapping = map[string]*mapping.DocumentMapping{}
indexMappingBefore.DefaultMapping = &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "vector",
Index: true,
Dims: 4,
Similarity: "l2_norm",
VectorIndexOptimizedFor: "latency",
},
},
},
"b": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "vector",
Index: true,
Dims: 4,
Similarity: "l2_norm",
VectorIndexOptimizedFor: "latency",
},
},
},
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "vector_base64",
Index: true,
Dims: 4,
Similarity: "l2_norm",
VectorIndexOptimizedFor: "latency",
},
},
},
"d": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "vector_base64",
Index: true,
Dims: 4,
Similarity: "l2_norm",
VectorIndexOptimizedFor: "latency",
},
},
},
},
Fields: []*mapping.FieldMapping{},
}
indexMappingBefore.IndexDynamic = false
indexMappingBefore.StoreDynamic = false
indexMappingBefore.DocValuesDynamic = false
index, err := New(tmpIndexPath, indexMappingBefore)
if err != nil {
t.Fatal(err)
}
doc1 := map[string]interface{}{"a": []float32{0.32894259691238403, 0.6973215341567993, 0.6835201978683472, 0.38296082615852356}, "b": []float32{0.32894259691238403, 0.6973215341567993, 0.6835201978683472, 0.38296082615852356}, "c": "L5MOPw7NID5SQMU9pHUoPw==", "d": "L5MOPw7NID5SQMU9pHUoPw=="}
doc2 := map[string]interface{}{"a": []float32{0.0018692062003538013, 0.41076546907424927, 0.5675257444381714, 0.45832985639572144}, "b": []float32{0.0018692062003538013, 0.41076546907424927, 0.5675257444381714, 0.45832985639572144}, "c": "czloP94ZCD71ldY+GbAOPw==", "d": "czloP94ZCD71ldY+GbAOPw=="}
doc3 := map[string]interface{}{"a": []float32{0.7853356599807739, 0.6904757618904114, 0.5643226504325867, 0.682637631893158}, "b": []float32{0.7853356599807739, 0.6904757618904114, 0.5643226504325867, 0.682637631893158}, "c": "Chh6P2lOqT47mjg/0odlPg==", "d": "Chh6P2lOqT47mjg/0odlPg=="}
batch := index.NewBatch()
err = batch.Index("001", doc1)
if err != nil {
t.Fatal(err)
}
err = batch.Index("002", doc2)
if err != nil {
t.Fatal(err)
}
err = batch.Index("003", doc3)
if err != nil {
t.Fatal(err)
}
err = index.Batch(batch)
if err != nil {
t.Fatal(err)
}
err = index.Close()
if err != nil {
t.Fatal(err)
}
indexMappingAfter := mapping.NewIndexMapping()
indexMappingAfter.TypeMapping = map[string]*mapping.DocumentMapping{}
indexMappingAfter.DefaultMapping = &mapping.DocumentMapping{
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{
"a": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "vector",
Index: true,
Dims: 4,
Similarity: "l2_norm",
VectorIndexOptimizedFor: "latency",
},
},
},
"c": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "vector_base64",
Index: true,
Dims: 4,
Similarity: "l2_norm",
VectorIndexOptimizedFor: "latency",
},
},
},
"d": {
Enabled: true,
Dynamic: false,
Properties: map[string]*mapping.DocumentMapping{},
Fields: []*mapping.FieldMapping{
{
Type: "vector_base64",
Index: false,
Dims: 4,
Similarity: "l2_norm",
VectorIndexOptimizedFor: "latency",
},
},
},
},
Fields: []*mapping.FieldMapping{},
}
indexMappingAfter.IndexDynamic = false
indexMappingAfter.StoreDynamic = false
indexMappingAfter.DocValuesDynamic = false
mappingString, err := json.Marshal(indexMappingAfter)
if err != nil {
t.Fatal(err)
}
config := map[string]interface{}{
"updated_mapping": string(mappingString),
}
index, err = OpenUsing(tmpIndexPath, config)
if err != nil {
t.Fatal(err)
}
q1 := NewSearchRequest(NewMatchNoneQuery())
q1.AddKNN("a", []float32{1, 2, 3, 4}, 3, 1.0)
res1, err := index.Search(q1)
if err != nil {
t.Fatal(err)
}
if len(res1.Hits) != 3 {
t.Fatalf("Expected 3 hits, got %d", len(res1.Hits))
}
q2 := NewSearchRequest(NewMatchNoneQuery())
q2.AddKNN("b", []float32{1, 2, 3, 4}, 3, 1.0)
res2, err := index.Search(q2)
if err != nil {
t.Fatal(err)
}
if len(res2.Hits) != 0 {
t.Fatalf("Expected 0 hits, got %d", len(res2.Hits))
}
q3 := NewSearchRequest(NewMatchNoneQuery())
q3.AddKNN("c", []float32{1, 2, 3, 4}, 3, 1.0)
res3, err := index.Search(q3)
if err != nil {
t.Fatal(err)
}
if len(res3.Hits) != 3 {
t.Fatalf("Expected 3 hits, got %d", len(res3.Hits))
}
q4 := NewSearchRequest(NewMatchNoneQuery())
q4.AddKNN("d", []float32{1, 2, 3, 4}, 3, 1.0)
res4, err := index.Search(q4)
if err != nil {
t.Fatal(err)
}
if len(res4.Hits) != 0 {
t.Fatalf("Expected 0 hits, got %d", len(res4.Hits))
}
}
func TestIndexInsightsTermFrequencies(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
mp := mapping.NewIndexMapping()
textMapping := mapping.NewTextFieldMapping()
textMapping.Analyzer = "en"
mp.DefaultMapping.AddFieldMappingsAt("text", textMapping)
idx, err := New(tmpIndexPath, mp)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
data := []map[string]string{
{
"id": "one",
"text": "She sells sea shells by the sea shore",
},
{
"id": "two",
"text": "The quick brown fox jumps over the lazy dog",
},
{
"id": "three",
"text": "She sold sea shells to the person with the dog",
},
{
"id": "four",
"text": "But there are a lot of dogs on the beach",
},
{
"id": "five",
"text": "To hell with the foxes",
},
{
"id": "six",
"text": "What about the dogs",
},
{
"id": "seven",
"text": "Dogs are OK, foxes are not",
},
}
expectTermFreqs := []index.TermFreq{
{Term: "dog", Frequency: 5},
{Term: "fox", Frequency: 3},
{Term: "sea", Frequency: 2},
{Term: "shell", Frequency: 2},
{Term: "beach", Frequency: 1},
}
for _, d := range data {
err = idx.Index(d["id"], d)
if err != nil {
t.Errorf("Error updating index: %v", err)
}
}
insightsIdx, ok := idx.(InsightsIndex)
if !ok {
t.Fatal("index does not support insights")
}
termFreqs, err := insightsIdx.TermFrequencies("text", 5, true)
if err != nil {
t.Fatal(err)
}
if !reflect.DeepEqual(termFreqs, expectTermFreqs) {
t.Fatalf("term freqs do not match: got: %v, expected: %v", termFreqs, expectTermFreqs)
}
}
func TestIndexInsightsCentroidCardinalities(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
vectorDims := 5
mp := mapping.NewIndexMapping()
vecFieldMapping := mapping.NewVectorFieldMapping()
vecFieldMapping.Dims = vectorDims
vecFieldMapping.Similarity = index.CosineSimilarity
mp.DefaultMapping.AddFieldMappingsAt("vec", vecFieldMapping)
idx, err := New(tmpIndexPath, mp)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
rand.Seed(time.Now().UnixNano())
min, max := float32(-10.0), float32(10.0)
genRandomVector := func() []float32 {
vec := make([]float32, vectorDims)
for i := range vec {
vec[i] = min + rand.Float32()*(max-min)
}
return vec
}
batch := idx.NewBatch()
for i := 1; i <= 50000; i++ {
if err = batch.Index(fmt.Sprintf("doc-%d", i), map[string]interface{}{
"vec": genRandomVector(),
}); err != nil {
t.Fatalf("error indexing doc: %v", err)
}
if i%200 == 0 {
err = idx.Batch(batch)
if err != nil {
t.Fatalf("Error adding batch to index: %v", err)
}
batch = idx.NewBatch()
}
}
if batch.Size() > 0 {
// In case doc count is not a multiple of 200, we need to add the final batch
err = idx.Batch(batch)
if err != nil {
t.Errorf("Error adding final batch to index: %v", err)
}
}
insightsIdx, ok := idx.(InsightsIndex)
if !ok {
t.Fatal("index does not support insights")
}
centroids, err := insightsIdx.CentroidCardinalities("vec", 5, true)
if err != nil {
t.Fatal(err)
}
if len(centroids) != 5 {
t.Fatalf("expected 5 centroids, got %d", len(centroids))
}
for _, entry := range centroids {
if len(entry.Index) == 0 {
t.Fatal("expected index name for each centroid")
}
}
}
func TestHierarchicalNestedVectorSearch(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
dataset := `
[
{
"id": "doc1",
"items": [
{
"description": "I like trains",
"embedding_vector": [
1,
0,
0
],
"type": "transport"
},
{
"description": "I love pizza",
"embedding_vector": [
0,
1,
0
],
"type": "food"
}
]
},
{
"id": "doc2",
"items": [
{
"description": "I go to school by bus",
"embedding_vector": [
0.9,
0.1,
0
],
"type": "transport"
},
{
"description": "Sushi is delicious",
"embedding_vector": [
0,
1,
0
],
"type": "food"
}
]
},
{
"id": "doc3",
"items": [
{
"description": "Hamburgers are tasty",
"embedding_vector": [
0,
0.8,
0.2
],
"type": "food"
},
{
"description": "I enjoy biking",
"embedding_vector": [
0.7,
0,
0.3
],
"type": "transport"
}
]
}
]`
var documents []map[string]interface{}
err := json.Unmarshal([]byte(dataset), &documents)
if err != nil {
t.Fatalf("failed to unmarshal dataset: %v", err)
}
indexMapping := NewIndexMapping()
vecFieldMapping := mapping.NewVectorFieldMapping()
vecFieldMapping.Dims = 3
vecFieldMapping.Similarity = index.CosineSimilarity
typeMapping := mapping.NewTextFieldMapping()
typeMapping.Analyzer = keyword.Name
descMapping := mapping.NewTextFieldMapping()
descMapping.Analyzer = en.AnalyzerName
// items is NOT nested
itemsMapping := mapping.NewDocumentMapping()
itemsMapping.AddFieldMappingsAt("embedding_vector", vecFieldMapping)
itemsMapping.AddFieldMappingsAt("type", typeMapping)
itemsMapping.AddFieldMappingsAt("description", descMapping)
indexMapping.DefaultMapping.AddSubDocumentMapping("items", itemsMapping)
idx, err := New(tmpIndexPath, indexMapping)
if err != nil {
t.Fatalf("failed to create index: %v", err)
}
defer func() {
if err := idx.Close(); err != nil {
t.Fatalf("failed to close index: %v", err)
}
}()
batch := idx.NewBatch()
for _, doc := range documents {
err := batch.Index(doc["id"].(string), doc)
if err != nil {
t.Fatalf("failed to index document %s: %v", doc["id"], err)
}
}
err = idx.Batch(batch)
if err != nil {
t.Fatalf("failed to batch index documents: %v", err)
}
// Plain vector search
searchReq := NewSearchRequest(query.NewMatchNoneQuery())
searchReq.AddKNN("items.embedding_vector", []float32{0, 1, 0}, 5, 1.0)
searchReq.SortBy([]string{"-_score", "_id"})
res, err := idx.Search(searchReq)
if err != nil {
t.Fatalf("failed to execute search: %v", err)
}
expectedOrder := []string{"doc1", "doc2", "doc3"}
expectedScores := []float64{1.0, 1.0, 0.970}
if len(res.Hits) != len(expectedOrder) {
t.Fatalf("expected %d hits, got %d", len(expectedOrder), len(res.Hits))
}
for i, expectedID := range expectedOrder {
if res.Hits[i].ID != expectedID {
t.Fatalf("at rank %d, expected docID %s, got %s", i+1, expectedID, res.Hits[i].ID)
}
if math.Abs(res.Hits[i].Score-expectedScores[i]) > 0.01 {
t.Fatalf("at rank %d, expected score %.3f, got %.3f", i+1, expectedScores[i], res.Hits[i].Score)
}
}
// Filtered vector search - should match output of plain vector search in non-nested case
filterQuery := NewTermQuery("transport")
filterQuery.SetField("items.type")
searchReq = NewSearchRequest(query.NewMatchNoneQuery())
searchReq.AddKNNWithFilter("items.embedding_vector", []float32{0, 1, 0}, 5, 1.0, filterQuery)
searchReq.SortBy([]string{"-_score", "_id"})
res, err = idx.Search(searchReq)
if err != nil {
t.Fatalf("failed to execute filtered search: %v", err)
}
if len(res.Hits) != len(expectedOrder) {
t.Fatalf("expected %d hits, got %d", len(expectedOrder), len(res.Hits))
}
for i, expectedID := range expectedOrder {
if res.Hits[i].ID != expectedID {
t.Fatalf("at rank %d, expected docID %s, got %s", i+1, expectedID, res.Hits[i].ID)
}
if math.Abs(res.Hits[i].Score-expectedScores[i]) > 0.01 {
t.Fatalf("at rank %d, expected score %.3f, got %.3f", i+1, expectedScores[i], res.Hits[i].Score)
}
}
// items IS nested
nestedItemsMapping := mapping.NewNestedDocumentMapping()
nestedItemsMapping.AddFieldMappingsAt("embedding_vector", vecFieldMapping)
nestedItemsMapping.AddFieldMappingsAt("type", typeMapping)
nestedItemsMapping.AddFieldMappingsAt("description", descMapping)
indexMappingNested := NewIndexMapping()
indexMappingNested.DefaultMapping.AddSubDocumentMapping("items", nestedItemsMapping)
idxNested, err := New(tmpIndexPath+"_nested", indexMappingNested)
if err != nil {
t.Fatalf("failed to create nested index: %v", err)
}
defer func() {
if err := idxNested.Close(); err != nil {
t.Fatalf("failed to close nested index: %v", err)
}
}()
batch = idxNested.NewBatch()
for _, doc := range documents {
err := batch.Index(doc["id"].(string), doc)
if err != nil {
t.Fatalf("failed to index document %s in nested index: %v", doc["id"], err)
}
}
err = idxNested.Batch(batch)
if err != nil {
t.Fatalf("failed to batch index documents in nested index: %v", err)
}
// Plain vector search on nested index
searchReq = NewSearchRequest(query.NewMatchNoneQuery())
searchReq.AddKNN("items.embedding_vector", []float32{0, 1, 0}, 5, 1.0)
searchReq.SortBy([]string{"-_score", "_id"})
res, err = idxNested.Search(searchReq)
if err != nil {
t.Fatalf("failed to execute search on nested index: %v", err)
}
// Exact same behavior as non-nested in this case
if len(res.Hits) != len(expectedOrder) {
t.Fatalf("expected %d hits, got %d", len(expectedOrder), len(res.Hits))
}
for i, expectedID := range expectedOrder {
if res.Hits[i].ID != expectedID {
t.Fatalf("at rank %d, expected docID %s, got %s", i+1, expectedID, res.Hits[i].ID)
}
if math.Abs(res.Hits[i].Score-expectedScores[i]) > 0.01 {
t.Fatalf("at rank %d, expected score %.3f, got %.3f", i+1, expectedScores[i], res.Hits[i].Score)
}
}
// Filtered vector search on nested index - should NOT match output of plain vector search in nested case
filterQuery = NewTermQuery("transport")
filterQuery.SetField("items.type")
searchReq = NewSearchRequest(query.NewMatchNoneQuery())
searchReq.AddKNNWithFilter("items.embedding_vector", []float32{0, 1, 0}, 5, 1.0, filterQuery)
searchReq.SortBy([]string{"-_score", "_id"})
res, err = idxNested.Search(searchReq)
if err != nil {
t.Fatalf("failed to execute filtered search on nested index: %v", err)
}
expectedNestedOrder := []string{"doc2", "doc1", "doc3"}
expectedNestedScores := []float64{0.110, 0, 0}
if len(res.Hits) != len(expectedNestedOrder) {
t.Fatalf("expected %d hits, got %d", len(expectedNestedOrder), len(res.Hits))
}
for i, expectedID := range expectedNestedOrder {
if res.Hits[i].ID != expectedID {
t.Fatalf("at rank %d, expected docID %s, got %s", i+1, expectedID, res.Hits[i].ID)
}
if math.Abs(res.Hits[i].Score-expectedNestedScores[i]) > 0.01 {
t.Fatalf("at rank %d, expected score %.3f, got %.3f", i+1, expectedNestedScores[i], res.Hits[i].Score)
}
}
}
================================================
FILE: search_nested_test.go
================================================
// Copyright (c) 2026 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"encoding/json"
"fmt"
"testing"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/highlight/highlighter/ansi"
"github.com/blevesearch/bleve/v2/search/query"
)
func createNestedIndexMapping() mapping.IndexMapping {
/*
company
├── id
├── name
├── departments[] (nested)
│ ├── name
│ ├── budget
│ ├── employees[] (nested)
│ │ ├── name
│ │ ├── role
│ └── projects[] (nested)
│ ├── title
│ ├── status
└── locations[] (nested)
├── city
├── country
*/
// Create the index mapping
imap := mapping.NewIndexMapping()
// Create company mapping
companyMapping := mapping.NewDocumentMapping()
// Company ID field
companyIDField := mapping.NewTextFieldMapping()
companyMapping.AddFieldMappingsAt("id", companyIDField)
// Company name field
companyNameField := mapping.NewTextFieldMapping()
companyMapping.AddFieldMappingsAt("name", companyNameField)
// Departments mapping
departmentsMapping := mapping.NewNestedDocumentMapping()
// Department name field
deptNameField := mapping.NewTextFieldMapping()
departmentsMapping.AddFieldMappingsAt("name", deptNameField)
// Department budget field
deptBudgetField := mapping.NewNumericFieldMapping()
departmentsMapping.AddFieldMappingsAt("budget", deptBudgetField)
// Employees mapping
employeesMapping := mapping.NewNestedDocumentMapping()
// Employee name field
empNameField := mapping.NewTextFieldMapping()
employeesMapping.AddFieldMappingsAt("name", empNameField)
// Employee role field
empRoleField := mapping.NewTextFieldMapping()
employeesMapping.AddFieldMappingsAt("role", empRoleField)
departmentsMapping.AddSubDocumentMapping("employees", employeesMapping)
// Projects mapping
projectsMapping := mapping.NewNestedDocumentMapping()
// Project title field
projTitleField := mapping.NewTextFieldMapping()
projectsMapping.AddFieldMappingsAt("title", projTitleField)
// Project status field
projStatusField := mapping.NewTextFieldMapping()
projectsMapping.AddFieldMappingsAt("status", projStatusField)
departmentsMapping.AddSubDocumentMapping("projects", projectsMapping)
companyMapping.AddSubDocumentMapping("departments", departmentsMapping)
// Locations mapping
locationsMapping := mapping.NewNestedDocumentMapping()
// Location city field
cityField := mapping.NewTextFieldMapping()
locationsMapping.AddFieldMappingsAt("city", cityField)
// Location country field
countryField := mapping.NewTextFieldMapping()
locationsMapping.AddFieldMappingsAt("country", countryField)
companyMapping.AddSubDocumentMapping("locations", locationsMapping)
// Add company to type mapping
imap.DefaultMapping.AddSubDocumentMapping("company", companyMapping)
return imap
}
func TestNestedPrefixes(t *testing.T) {
imap := createNestedIndexMapping()
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
if err := idx.Close(); err != nil {
t.Fatal(err)
}
}()
nmap, ok := imap.(mapping.NestedMapping)
if !ok {
t.Fatal("index mapping is not a NestedMapping")
}
// ----------------------------------------------------------------------
// Test 1: Employee Role AND Employee Name
// ----------------------------------------------------------------------
fs := search.NewFieldSet()
fs.AddField("company.departments.employees.role")
fs.AddField("company.departments.employees.name")
expectedCommon := 2
expectedMax := 2
common, max := nmap.NestedDepth(fs)
if common != expectedCommon || max != expectedMax {
t.Fatalf("Test1: expected (common=%d, max=%d), got (common=%d, max=%d)",
expectedCommon, expectedMax, common, max)
}
// ----------------------------------------------------------------------
// Test 2: Employee Role AND Employee Name AND Department Name
// ----------------------------------------------------------------------
fs = search.NewFieldSet()
fs.AddField("company.departments.employees.role")
fs.AddField("company.departments.employees.name")
fs.AddField("company.departments.name")
expectedCommon = 1
expectedMax = 2 // employees nested deeper
common, max = nmap.NestedDepth(fs)
if common != expectedCommon || max != expectedMax {
t.Fatalf("Test2: expected (common=%d, max=%d), got (common=%d, max=%d)",
expectedCommon, expectedMax, common, max)
}
// ----------------------------------------------------------------------
// Test 3: Employee Role AND Location City
// ----------------------------------------------------------------------
fs = search.NewFieldSet()
fs.AddField("company.departments.employees.role")
fs.AddField("company.locations.city")
expectedCommon = 0
expectedMax = 2 // employees deeper than locations (1)
common, max = nmap.NestedDepth(fs)
if common != expectedCommon || max != expectedMax {
t.Fatalf("Test3: expected (common=%d, max=%d), got (common=%d, max=%d)",
expectedCommon, expectedMax, common, max)
}
// ----------------------------------------------------------------------
// Test 4: Company Name AND Location Country
// ----------------------------------------------------------------------
fs = search.NewFieldSet()
fs.AddField("company.name")
fs.AddField("company.locations.country")
fs.AddField("company.locations.city")
expectedCommon = 0
expectedMax = 1 // locations.country and locations.city share depth 1
common, max = nmap.NestedDepth(fs)
if common != expectedCommon || max != expectedMax {
t.Fatalf("Test4: expected (common=%d, max=%d), got (common=%d, max=%d)",
expectedCommon, expectedMax, common, max)
}
// ----------------------------------------------------------------------
// Test 5: Department Budget AND Project Status AND Employee Name
// ----------------------------------------------------------------------
fs = search.NewFieldSet()
fs.AddField("company.departments.budget")
fs.AddField("company.departments.projects.status")
fs.AddField("company.departments.employees.name")
expectedCommon = 1
expectedMax = 2 // employees + projects go deeper
common, max = nmap.NestedDepth(fs)
if common != expectedCommon || max != expectedMax {
t.Fatalf("Test5: expected (common=%d, max=%d), got (common=%d, max=%d)",
expectedCommon, expectedMax, common, max)
}
// ----------------------------------------------------------------------
// Test 6: Single Field
// ----------------------------------------------------------------------
fs = search.NewFieldSet()
fs.AddField("company.id")
expectedCommon = 0
expectedMax = 0
common, max = nmap.NestedDepth(fs)
if common != expectedCommon || max != expectedMax {
t.Fatalf("Test6: expected (common=%d, max=%d), got (common=%d, max=%d)",
expectedCommon, expectedMax, common, max)
}
// ----------------------------------------------------------------------
// Test 7: No Fields
// ----------------------------------------------------------------------
fs = search.NewFieldSet()
expectedCommon = 0
expectedMax = 0
common, max = nmap.NestedDepth(fs)
if common != expectedCommon || max != expectedMax {
t.Fatalf("Test7: expected (common=%d, max=%d), got (common=%d, max=%d)",
expectedCommon, expectedMax, common, max)
}
// ----------------------------------------------------------------------
// Test 8: All Fields
// ----------------------------------------------------------------------
fs = search.NewFieldSet()
fs.AddField("company.id")
fs.AddField("company.name")
fs.AddField("company.departments.name")
fs.AddField("company.departments.budget")
fs.AddField("company.departments.employees.name")
fs.AddField("company.departments.employees.role")
fs.AddField("company.departments.projects.title")
fs.AddField("company.departments.projects.status")
fs.AddField("company.locations.city")
fs.AddField("company.locations.country")
expectedCommon = 0 // spans different contexts
expectedMax = 2
common, max = nmap.NestedDepth(fs)
if common != expectedCommon || max != expectedMax {
t.Fatalf("Test8: expected (common=%d, max=%d), got (common=%d, max=%d)",
expectedCommon, expectedMax, common, max)
}
// ----------------------------------------------------------------------
// Test 9: Project Title AND Project Status
// ----------------------------------------------------------------------
fs = search.NewFieldSet()
fs.AddField("company.departments.projects.title")
fs.AddField("company.departments.projects.status")
expectedCommon = 2
expectedMax = 2
common, max = nmap.NestedDepth(fs)
if common != expectedCommon || max != expectedMax {
t.Fatalf("Test9: expected (common=%d, max=%d), got (common=%d, max=%d)",
expectedCommon, expectedMax, common, max)
}
// ----------------------------------------------------------------------
// Test 10: Department Name AND Location Country
// ----------------------------------------------------------------------
fs = search.NewFieldSet()
fs.AddField("company.departments.name")
fs.AddField("company.locations.country")
fs.AddField("company.locations.city")
expectedCommon = 0
expectedMax = 1 // locations share depth 1
common, max = nmap.NestedDepth(fs)
if common != expectedCommon || max != expectedMax {
t.Fatalf("Test10: expected (common=%d, max=%d), got (common=%d, max=%d)",
expectedCommon, expectedMax, common, max)
}
}
func TestNestedConjunctionQuery(t *testing.T) {
imap := createNestedIndexMapping()
err := imap.Validate()
if err != nil {
t.Fatalf("expected valid nested index mapping, got error: %v", err)
}
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
// Index 3 sample documents
docs := []struct {
id string
data string
}{
{
id: "doc1",
data: `{
"company": {
"id": "c1",
"name": "TechCorp",
"departments": [
{
"name": "Engineering",
"budget": 2000000,
"employees": [
{"name": "Alice", "role": "Engineer"},
{"name": "Bob", "role": "Manager"}
],
"projects": [
{"title": "Project X", "status": "ongoing"},
{"title": "Project Y", "status": "completed"}
]
},
{
"name": "Sales",
"budget": 300000,
"employees": [
{"name": "Eve", "role": "Salesperson"},
{"name": "Mallory", "role": "Manager"}
],
"projects": [
{"title": "Project A", "status": "completed"},
{"title": "Project B", "status": "ongoing"}
]
}
],
"locations": [
{"city": "Athens", "country": "Greece"},
{"city": "Berlin", "country": "USA"}
]
}
}`,
},
{
id: "doc2",
data: `{
"company" : {
"id": "c2",
"name": "BizInc",
"departments": [
{
"name": "Marketing",
"budget": 800000,
"employees": [
{"name": "Eve", "role": "Marketer"},
{"name": "David", "role": "Manager"}
],
"projects": [
{"title": "Project Z", "status": "ongoing"},
{"title": "Project W", "status": "planned"}
]
},
{
"name": "Engineering",
"budget": 800000,
"employees": [
{"name": "Frank", "role": "Manager"},
{"name": "Grace", "role": "Engineer"}
],
"projects": [
{"title": "Project Alpha", "status": "completed"},
{"title": "Project Beta", "status": "ongoing"}
]
}
],
"locations": [
{"city": "Athens", "country": "USA"},
{"city": "London", "country": "UK"}
]
}
}`,
},
{
id: "doc3",
data: `{
"company": {
"id": "c3",
"name": "WebSolutions",
"departments": [
{
"name": "HR",
"budget": 800000,
"employees": [
{"name": "Eve", "role": "Manager"},
{"name": "Frank", "role": "HR"}
],
"projects": [
{"title": "Project Beta", "status": "completed"},
{"title": "Project B", "status": "ongoing"}
]
},
{
"name": "Engineering",
"budget": 200000,
"employees": [
{"name": "Heidi", "role": "Support Engineer"},
{"name": "Ivan", "role": "Manager"}
],
"projects": [
{"title": "Project Helpdesk", "status": "ongoing"},
{"title": "Project FAQ", "status": "completed"}
]
}
],
"locations": [
{"city": "Edinburgh", "country": "UK"},
{"city": "London", "country": "Canada"}
]
}
}`,
},
}
for _, doc := range docs {
var dataMap map[string]interface{}
err := json.Unmarshal([]byte(doc.data), &dataMap)
if err != nil {
t.Fatalf("failed to unmarshal document %s: %v", doc.id, err)
}
err = idx.Index(doc.id, dataMap)
if err != nil {
t.Fatalf("failed to index document %s: %v", doc.id, err)
}
}
var buildReq = func(subQueries []query.Query) *SearchRequest {
rv := NewSearchRequest(query.NewConjunctionQuery(subQueries))
rv.SortBy([]string{"_id"})
rv.Fields = []string{"*"}
rv.Highlight = NewHighlightWithStyle(ansi.Name)
return rv
}
var (
req *SearchRequest
res *SearchResult
deptNameQuery *query.MatchQuery
deptBudgetQuery *query.NumericRangeQuery
empNameQuery *query.MatchQuery
empRoleQuery *query.MatchQuery
projTitleQuery *query.MatchPhraseQuery
projStatusQuery *query.MatchQuery
countryQuery *query.MatchQuery
cityQuery *query.MatchQuery
)
// Test 1: Find companies with a department named "Engineering" AND budget over 900000
deptNameQuery = query.NewMatchQuery("Engineering")
deptNameQuery.SetField("company.departments.name")
min := float64(800000)
deptBudgetQuery = query.NewNumericRangeQuery(&min, nil)
deptBudgetQuery.SetField("company.departments.budget")
req = buildReq([]query.Query{deptNameQuery, deptBudgetQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 2 {
t.Fatalf("expected 2 hit, got %d", len(res.Hits))
}
if res.Hits[0].ID != "doc1" || res.Hits[1].ID != "doc2" {
t.Fatalf("unexpected hit IDs: %v, %v", res.Hits[0].ID, res.Hits[1].ID)
}
// Test 2: Find companies with an employee named "Eve" AND project status "completed"
empNameQuery = query.NewMatchQuery("Eve")
empNameQuery.SetField("company.departments.employees.name")
projStatusQuery = query.NewMatchQuery("completed")
projStatusQuery.SetField("company.departments.projects.status")
req = buildReq([]query.Query{empNameQuery, projStatusQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 2 {
t.Fatalf("expected 2 hits, got %d", len(res.Hits))
}
if res.Hits[0].ID != "doc1" || res.Hits[1].ID != "doc3" {
t.Fatalf("unexpected hit IDs: %v, %v", res.Hits[0].ID, res.Hits[1].ID)
}
// Test 3: Find companies located in "Athens, USA" AND with an Engineering department
countryQuery = query.NewMatchQuery("USA")
countryQuery.SetField("company.locations.country")
cityQuery = query.NewMatchQuery("Athens")
cityQuery.SetField("company.locations.city")
locQuery := query.NewConjunctionQuery([]query.Query{countryQuery, cityQuery})
deptNameQuery = query.NewMatchQuery("Engineering")
deptNameQuery.SetField("company.departments.name")
req = buildReq([]query.Query{locQuery, deptNameQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 1 {
t.Fatalf("expected 1 hit, got %d", len(res.Hits))
}
if res.Hits[0].ID != "doc2" {
t.Fatalf("unexpected hit ID: %v", res.Hits[0].ID)
}
// Test 4a: Find companies located in "Athens, USA" AND with an Engineering department with a budget over 1M
countryQuery = query.NewMatchQuery("USA")
countryQuery.SetField("company.locations.country")
cityQuery = query.NewMatchQuery("Athens")
cityQuery.SetField("company.locations.city")
locQuery = query.NewConjunctionQuery([]query.Query{countryQuery, cityQuery})
deptNameQuery = query.NewMatchQuery("Engineering")
deptNameQuery.SetField("company.departments.name")
min = float64(1000000)
deptBudgetQuery = query.NewNumericRangeQuery(&min, nil)
deptBudgetQuery.SetField("company.departments.budget")
deptQuery := query.NewConjunctionQuery([]query.Query{deptNameQuery, deptBudgetQuery})
req = buildReq([]query.Query{locQuery, deptQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 0 {
t.Fatalf("expected 0 hits, got %d", len(res.Hits))
}
// Test 4b: Find companies located in "Athens, Greece" AND with an Engineering department with a budget over 1M
countryQuery = query.NewMatchQuery("Greece")
countryQuery.SetField("company.locations.country")
cityQuery = query.NewMatchQuery("Athens")
cityQuery.SetField("company.locations.city")
locQuery = query.NewConjunctionQuery([]query.Query{countryQuery, cityQuery})
deptNameQuery = query.NewMatchQuery("Engineering")
deptNameQuery.SetField("company.departments.name")
min = float64(1000000)
deptBudgetQuery = query.NewNumericRangeQuery(&min, nil)
deptBudgetQuery.SetField("company.departments.budget")
deptQuery = query.NewConjunctionQuery([]query.Query{deptNameQuery, deptBudgetQuery})
req = buildReq([]query.Query{locQuery, deptQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 1 {
t.Fatalf("expected 1 hits, got %d", len(res.Hits))
}
if res.Hits[0].ID != "doc1" {
t.Fatalf("unexpected hit ID: %v", res.Hits[0].ID)
}
// Test 5a: Find companies with an employee named "Frank" AND role "Manager" whose department is
// handling a project titled "Project Beta" which is marked as "completed"
empNameQuery = query.NewMatchQuery("Frank")
empNameQuery.SetField("company.departments.employees.name")
empRoleQuery = query.NewMatchQuery("Manager")
empRoleQuery.SetField("company.departments.employees.role")
empQuery := query.NewConjunctionQuery([]query.Query{empNameQuery, empRoleQuery})
projTitleQuery = query.NewMatchPhraseQuery("Project Beta")
projTitleQuery.SetField("company.departments.projects.title")
projStatusQuery = query.NewMatchQuery("completed")
projStatusQuery.SetField("company.departments.projects.status")
projQuery := query.NewConjunctionQuery([]query.Query{projTitleQuery, projStatusQuery})
req = buildReq([]query.Query{empQuery, projQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 0 {
t.Fatalf("expected 0 hit, got %d", len(res.Hits))
}
// Test 5b: Find companies with an employee named "Frank" AND role "Manager" whose department is
// handling a project titled "Project Beta" which is marked as "ongoing"
empNameQuery = query.NewMatchQuery("Frank")
empNameQuery.SetField("company.departments.employees.name")
empRoleQuery = query.NewMatchQuery("Manager")
empRoleQuery.SetField("company.departments.employees.role")
empQuery = query.NewConjunctionQuery([]query.Query{empNameQuery, empRoleQuery})
projTitleQuery = query.NewMatchPhraseQuery("Project Beta")
projTitleQuery.SetField("company.departments.projects.title")
projStatusQuery = query.NewMatchQuery("ongoing")
projStatusQuery.SetField("company.departments.projects.status")
projQuery = query.NewConjunctionQuery([]query.Query{projTitleQuery, projStatusQuery})
req = buildReq([]query.Query{empQuery, projQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 1 {
t.Fatalf("expected 1 hit, got %d", len(res.Hits))
}
if res.Hits[0].ID != "doc2" {
t.Fatalf("unexpected hit ID: %v", res.Hits[0].ID)
}
// Test 6a: Find companies with an employee named "Eve" AND role "Manager"
// who is working in a department located in "London, UK"
empNameQuery = query.NewMatchQuery("Eve")
empNameQuery.SetField("company.departments.employees.name")
empRoleQuery = query.NewMatchQuery("Manager")
empRoleQuery.SetField("company.departments.employees.role")
empQuery = query.NewConjunctionQuery([]query.Query{empNameQuery, empRoleQuery})
countryQuery = query.NewMatchQuery("UK")
countryQuery.SetField("company.locations.country")
cityQuery = query.NewMatchQuery("London")
cityQuery.SetField("company.locations.city")
locQuery = query.NewConjunctionQuery([]query.Query{countryQuery, cityQuery})
req = buildReq([]query.Query{empQuery, locQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 0 {
t.Fatalf("expected 0 hit, got %d", len(res.Hits))
}
// Test 6b: Find companies with an employee named "Eve" AND role "Manager"
// who is working in a department located in "London, Canada"
empNameQuery = query.NewMatchQuery("Eve")
empNameQuery.SetField("company.departments.employees.name")
empRoleQuery = query.NewMatchQuery("Manager")
empRoleQuery.SetField("company.departments.employees.role")
empQuery = query.NewConjunctionQuery([]query.Query{empNameQuery, empRoleQuery})
countryQuery = query.NewMatchQuery("Canada")
countryQuery.SetField("company.locations.country")
cityQuery = query.NewMatchQuery("London")
cityQuery.SetField("company.locations.city")
locQuery = query.NewConjunctionQuery([]query.Query{countryQuery, cityQuery})
req = buildReq([]query.Query{empQuery, locQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 1 {
t.Fatalf("expected 1 hit, got %d", len(res.Hits))
}
if res.Hits[0].ID != "doc3" {
t.Fatalf("unexpected hit ID: %v", res.Hits[0].ID)
}
// Test 7a: Find companies where Ivan the Manager works London, UK
empNameQuery = query.NewMatchQuery("Ivan")
empNameQuery.SetField("company.departments.employees.name")
empRoleQuery = query.NewMatchQuery("Manager")
empRoleQuery.SetField("company.departments.employees.role")
empQuery = query.NewConjunctionQuery([]query.Query{empNameQuery, empRoleQuery})
countryQuery = query.NewMatchQuery("UK")
countryQuery.SetField("company.locations.country")
cityQuery = query.NewMatchQuery("London")
cityQuery.SetField("company.locations.city")
locQuery = query.NewConjunctionQuery([]query.Query{countryQuery, cityQuery})
req = buildReq([]query.Query{empQuery, locQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 0 {
t.Fatalf("expected 0 hit, got %d", len(res.Hits))
}
// Test 7b: Find companies where Ivan the Manager works London, Canada
empNameQuery = query.NewMatchQuery("Ivan")
empNameQuery.SetField("company.departments.employees.name")
empRoleQuery = query.NewMatchQuery("Manager")
empRoleQuery.SetField("company.departments.employees.role")
empQuery = query.NewConjunctionQuery([]query.Query{empNameQuery, empRoleQuery})
countryQuery = query.NewMatchQuery("Canada")
countryQuery.SetField("company.locations.country")
cityQuery = query.NewMatchQuery("London")
cityQuery.SetField("company.locations.city")
locQuery = query.NewConjunctionQuery([]query.Query{countryQuery, cityQuery})
req = buildReq([]query.Query{empQuery, locQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 1 {
t.Fatalf("expected 1 hit, got %d", len(res.Hits))
}
if res.Hits[0].ID != "doc3" {
t.Fatalf("unexpected hit ID: %v", res.Hits[0].ID)
}
// Test 8: Find companies where Frank the Manager works in Engineering department located in London, UK
empNameQuery = query.NewMatchQuery("Frank")
empNameQuery.SetField("company.departments.employees.name")
empRoleQuery = query.NewMatchQuery("Manager")
empRoleQuery.SetField("company.departments.employees.role")
empQuery = query.NewConjunctionQuery([]query.Query{empNameQuery, empRoleQuery})
deptNameQuery = query.NewMatchQuery("Engineering")
deptNameQuery.SetField("company.departments.name")
deptQuery = query.NewConjunctionQuery([]query.Query{empQuery, deptNameQuery})
countryQuery = query.NewMatchQuery("UK")
countryQuery.SetField("company.locations.country")
cityQuery = query.NewMatchQuery("London")
cityQuery.SetField("company.locations.city")
locQuery = query.NewConjunctionQuery([]query.Query{countryQuery, cityQuery})
req = buildReq([]query.Query{deptQuery, locQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 1 {
t.Fatalf("expected 1 hit, got %d", len(res.Hits))
}
if res.Hits[0].ID != "doc2" {
t.Fatalf("unexpected hit ID: %v", res.Hits[0].ID)
}
// Test 9: Match_All query must return only top-level documents
matchAllQuery := query.NewMatchAllQuery()
req = buildReq([]query.Query{matchAllQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 3 {
t.Fatalf("expected 3 hits, got %d", len(res.Hits))
}
// Test 10: DocID query must return only top-level documents
docIDQuery := query.NewDocIDQuery([]string{"doc1", "doc2", "doc3", "doc2_$company.locations_$0", "doc3_$company.departments_$0_$company.departments.employees_$0"})
req = buildReq([]query.Query{docIDQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 3 {
t.Fatalf("expected 3 hits, got %d", len(res.Hits))
}
// Test 11: Boolean query in Filter-only mode must return correct top-level documents
empNameQuery = query.NewMatchQuery("Frank")
empNameQuery.SetField("company.departments.employees.name")
boolQuery := query.NewBooleanQuery(nil, nil, nil)
boolQuery.AddFilter(empNameQuery)
req = buildReq([]query.Query{boolQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 2 {
t.Fatalf("expected 2 hits, got %d", len(res.Hits))
}
if res.Hits[0].ID != "doc2" || res.Hits[1].ID != "doc3" {
t.Fatalf("unexpected hit IDs: %v, %v", res.Hits[0].ID, res.Hits[1].ID)
}
// Test 12: Boolean query Must clause should work in nested context
empNameQuery = query.NewMatchQuery("Ivan")
empNameQuery.SetField("company.departments.employees.name")
empRoleQuery = query.NewMatchQuery("Manager")
empRoleQuery.SetField("company.departments.employees.role")
empQuery = query.NewConjunctionQuery([]query.Query{empNameQuery, empRoleQuery})
countryQuery = query.NewMatchQuery("Canada")
countryQuery.SetField("company.locations.country")
cityQuery = query.NewMatchQuery("London")
cityQuery.SetField("company.locations.city")
locQuery = query.NewConjunctionQuery([]query.Query{countryQuery, cityQuery})
boolQuery = query.NewBooleanQuery([]query.Query{empQuery, locQuery}, nil, nil)
req = buildReq([]query.Query{boolQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 1 {
t.Fatalf("expected 1 hit, got %d", len(res.Hits))
}
if res.Hits[0].ID != "doc3" {
t.Fatalf("unexpected hit ID: %v", res.Hits[0].ID)
}
// Test 13: Queries targetting _all field should:
// - match only top-level fields when no specific field is set
// - not match nested fields when no specific field is set
// - work correctly when combined with nested field queries,
// returning only top-level documents where both conditions are met
allRootFieldQuery := query.NewMatchQuery("TechCorp")
req = buildReq([]query.Query{allRootFieldQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 1 {
t.Fatalf("expected 1 hit, got %d", len(res.Hits))
}
if res.Hits[0].ID != "doc1" {
t.Fatalf("unexpected hit ID: %v", res.Hits[0].ID)
}
allNestedFieldQuery := query.NewMatchQuery("Alice")
req = buildReq([]query.Query{allNestedFieldQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 0 {
t.Fatalf("expected 0 hits, got %d", len(res.Hits))
}
allMixedQuery := buildReq([]query.Query{allRootFieldQuery, allNestedFieldQuery})
res, err = idx.Search(allMixedQuery)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 0 {
t.Fatalf("expected 0 hits, got %d", len(res.Hits))
}
nestedFieldQuery := query.NewMatchQuery("Alice")
nestedFieldQuery.SetField("company.departments.employees.name")
allMixedQueryWithNested := buildReq([]query.Query{allRootFieldQuery, nestedFieldQuery})
res, err = idx.Search(allMixedQueryWithNested)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 1 {
t.Fatalf("expected 1 hit, got %d", len(res.Hits))
}
if res.Hits[0].ID != "doc1" {
t.Fatalf("unexpected hit ID: %v", res.Hits[0].ID)
}
empNameQuery = query.NewMatchQuery("Frank")
empNameQuery.SetField("company.departments.employees.name")
empRoleQuery = query.NewMatchQuery("Manager")
empRoleQuery.SetField("company.departments.employees.role")
empQuery = query.NewConjunctionQuery([]query.Query{empNameQuery, empRoleQuery})
deptNameQuery = query.NewMatchQuery("Engineering")
deptNameQuery.SetField("company.departments.name")
deptQuery = query.NewConjunctionQuery([]query.Query{empQuery, deptNameQuery})
countryQuery = query.NewMatchQuery("UK")
countryQuery.SetField("company.locations.country")
cityQuery = query.NewMatchQuery("London")
cityQuery.SetField("company.locations.city")
locQuery = query.NewConjunctionQuery([]query.Query{countryQuery, cityQuery})
// mixed queries with _all field and _id field should match at root level always
companyNameAllQuery := query.NewMatchQuery("BizInc")
matchAllQuery = query.NewMatchAllQuery()
req = buildReq([]query.Query{deptQuery, locQuery, companyNameAllQuery, matchAllQuery})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 1 {
t.Fatalf("expected 1 hit, got %d", len(res.Hits))
}
if res.Hits[0].ID != "doc2" {
t.Fatalf("unexpected hit ID: %v", res.Hits[0].ID)
}
companyNameAllQueryNoMatch := query.NewMatchQuery("WebSolutions")
req = buildReq([]query.Query{deptQuery, locQuery, companyNameAllQueryNoMatch})
res, err = idx.Search(req)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 0 {
t.Fatalf("expected 0 hits, got %d", len(res.Hits))
}
}
func TestNestedArrayConjunctionQuery(t *testing.T) {
imap := NewIndexMapping()
groupsMapping := mapping.NewNestedDocumentMapping()
nameField := mapping.NewTextFieldMapping()
groupsMapping.AddFieldMappingsAt("first_name", nameField)
groupsMapping.AddFieldMappingsAt("last_name", nameField)
imap.DefaultMapping.AddSubDocumentMapping("groups", groupsMapping)
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
docs := []string{
`{
"groups": [
[
{
"first_name": "Alice",
"last_name": "Smith"
},
{
"first_name": "Bob",
"last_name": "Johnson"
}
],
[
{
"first_name": "Charlie",
"last_name": "Williams"
},
{
"first_name": "Diana",
"last_name": "Brown"
}
]
]
}`,
`{
"groups": [
{
"first_name": "Alice",
"last_name": "Smith"
},
{
"first_name": "Bob",
"last_name": "Johnson"
},
{
"first_name": "Charlie",
"last_name": "Williams"
},
{
"first_name": "Diana",
"last_name": "Brown"
}
]
}`,
}
for i, doc := range docs {
var dataMap map[string]interface{}
err := json.Unmarshal([]byte(doc), &dataMap)
if err != nil {
t.Fatalf("failed to unmarshal document %d: %v", i, err)
}
err = idx.Index(fmt.Sprintf("%d", i+1), dataMap)
if err != nil {
t.Fatalf("failed to index document %d: %v", i, err)
}
}
var (
firstNameQuery *query.MatchQuery
lastNameQuery *query.MatchQuery
conjQuery *query.ConjunctionQuery
searchReq *SearchRequest
res *SearchResult
)
// Search for documents where first_name is "Alice" AND last_name is "Johnson"
firstNameQuery = query.NewMatchQuery("Alice")
firstNameQuery.SetField("groups.first_name")
lastNameQuery = query.NewMatchQuery("Johnson")
lastNameQuery.SetField("groups.last_name")
conjQuery = query.NewConjunctionQuery([]query.Query{firstNameQuery, lastNameQuery})
searchReq = NewSearchRequest(conjQuery)
searchReq.SortBy([]string{"_id"})
res, err = idx.Search(searchReq)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 0 {
t.Fatalf("expected 0 hits, got %d", len(res.Hits))
}
// Search for documents where first_name is "Bob" AND last_name is "Johnson"
firstNameQuery = query.NewMatchQuery("Bob")
firstNameQuery.SetField("groups.first_name")
lastNameQuery = query.NewMatchQuery("Johnson")
lastNameQuery.SetField("groups.last_name")
conjQuery = query.NewConjunctionQuery([]query.Query{firstNameQuery, lastNameQuery})
searchReq = NewSearchRequest(conjQuery)
searchReq.SortBy([]string{"_id"})
res, err = idx.Search(searchReq)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 2 {
t.Fatalf("expected 2 hits, got %d", len(res.Hits))
}
if res.Hits[0].ID != "1" || res.Hits[1].ID != "2" {
t.Fatalf("unexpected hit IDs: %v, %v", res.Hits[0].ID, res.Hits[1].ID)
}
// Search for documents where first_name is "Alice" AND last_name is "Williams"
firstNameQuery = query.NewMatchQuery("Alice")
firstNameQuery.SetField("groups.first_name")
lastNameQuery = query.NewMatchQuery("Williams")
lastNameQuery.SetField("groups.last_name")
conjQuery = query.NewConjunctionQuery([]query.Query{firstNameQuery, lastNameQuery})
searchReq = NewSearchRequest(conjQuery)
searchReq.SortBy([]string{"_id"})
res, err = idx.Search(searchReq)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 0 {
t.Fatalf("expected 0 hits, got %d", len(res.Hits))
}
// Search for documents where first_name is "Diana" AND last_name is "Brown"
firstNameQuery = query.NewMatchQuery("Diana")
firstNameQuery.SetField("groups.first_name")
lastNameQuery = query.NewMatchQuery("Brown")
lastNameQuery.SetField("groups.last_name")
conjQuery = query.NewConjunctionQuery([]query.Query{firstNameQuery, lastNameQuery})
searchReq = NewSearchRequest(conjQuery)
searchReq.SortBy([]string{"_id"})
res, err = idx.Search(searchReq)
if err != nil {
t.Fatalf("search failed: %v", err)
}
if len(res.Hits) != 2 {
t.Fatalf("expected 2 hits, got %d", len(res.Hits))
}
if res.Hits[0].ID != "1" || res.Hits[1].ID != "2" {
t.Fatalf("unexpected hit IDs: %v, %v", res.Hits[0].ID, res.Hits[1].ID)
}
}
func TestValidNestedMapping(t *testing.T) {
// ensure that top-level mappings - DefaultMapping and any type mappings - cannot be nested mappings
imap := mapping.NewIndexMapping()
nestedMapping := mapping.NewNestedDocumentMapping()
imap.DefaultMapping = nestedMapping
err := imap.Validate()
if err == nil {
t.Fatalf("expected error for nested DefaultMapping, got nil")
}
// invalid nested type mapping
imap = mapping.NewIndexMapping()
imap.AddDocumentMapping("type1", nestedMapping)
err = imap.Validate()
if err == nil {
t.Fatalf("expected error for nested type mapping, got nil")
}
// valid nested mappings within DefaultMapping
imap = mapping.NewIndexMapping()
docMapping := mapping.NewDocumentMapping()
nestedMapping = mapping.NewNestedDocumentMapping()
fieldMapping := mapping.NewTextFieldMapping()
nestedMapping.AddFieldMappingsAt("field1", fieldMapping)
docMapping.AddSubDocumentMapping("nestedField", nestedMapping)
imap.DefaultMapping = docMapping
err = imap.Validate()
if err != nil {
t.Fatalf("expected valid nested mapping, got error: %v", err)
}
// valid nested mappings within type mapping
imap = mapping.NewIndexMapping()
docMapping = mapping.NewDocumentMapping()
nestedMapping = mapping.NewNestedDocumentMapping()
fieldMapping = mapping.NewTextFieldMapping()
nestedMapping.AddFieldMappingsAt("field1", fieldMapping)
docMapping.AddSubDocumentMapping("nestedField", nestedMapping)
imap.AddDocumentMapping("type1", docMapping)
err = imap.Validate()
if err != nil {
t.Fatalf("expected valid nested mapping, got error: %v", err)
}
// some nested type mappings
imap = mapping.NewIndexMapping()
nestedMapping = mapping.NewNestedDocumentMapping()
regularMapping := mapping.NewDocumentMapping()
imap.AddDocumentMapping("non_nested1", regularMapping)
imap.AddDocumentMapping("non_nested2", regularMapping)
imap.AddDocumentMapping("nested1", nestedMapping)
imap.AddDocumentMapping("nested2", nestedMapping)
err = imap.Validate()
if err == nil {
t.Fatalf("expected error for nested type mappings, got nil")
}
}
================================================
FILE: search_no_knn.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//go:build !vectors
// +build !vectors
package bleve
import (
"context"
"encoding/json"
"sort"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/collector"
"github.com/blevesearch/bleve/v2/search/query"
index "github.com/blevesearch/bleve_index_api"
)
const supportForVectorSearch = false
// A SearchRequest describes all the parameters
// needed to search the index.
// Query is required.
// Size/From describe how much and which part of the
// result set to return.
// Highlight describes optional search result
// highlighting.
// Fields describes a list of field values which
// should be retrieved for result documents, provided they
// were stored while indexing.
// Facets describe the set of facets to be computed.
// Explain triggers inclusion of additional search
// result score explanations.
// Sort describes the desired order for the results to be returned.
// Score controls the kind of scoring performed
// SearchAfter supports deep paging by providing a minimum sort key
// SearchBefore supports deep paging by providing a maximum sort key
// sortFunc specifies the sort implementation to use for sorting results.
//
// A special field named "*" can be used to return all fields.
type SearchRequest struct {
ClientContextID string `json:"client_context_id,omitempty"`
Query query.Query `json:"query"`
Size int `json:"size"`
From int `json:"from"`
Highlight *HighlightRequest `json:"highlight"`
Fields []string `json:"fields"`
Facets FacetsRequest `json:"facets"`
Explain bool `json:"explain"`
Sort search.SortOrder `json:"sort"`
IncludeLocations bool `json:"includeLocations"`
Score string `json:"score,omitempty"`
SearchAfter []string `json:"search_after"`
SearchBefore []string `json:"search_before"`
// PreSearchData will be a map that will be used
// in the second phase of any 2-phase search, to provide additional
// context to the second phase. This is useful in the case of index
// aliases where the first phase will gather the PreSearchData from all
// the indexes in the alias, and the second phase will use that
// PreSearchData to perform the actual search.
// The currently accepted map configuration is:
//
// "_knn_pre_search_data_key": []*search.DocumentMatch
PreSearchData map[string]interface{} `json:"pre_search_data,omitempty"`
Params *RequestParams `json:"params,omitempty"`
sortFunc func(sort.Interface)
}
// UnmarshalJSON deserializes a JSON representation of
// a SearchRequest
func (r *SearchRequest) UnmarshalJSON(input []byte) error {
var temp struct {
Q json.RawMessage `json:"query"`
Size *int `json:"size"`
From int `json:"from"`
Highlight *HighlightRequest `json:"highlight"`
Fields []string `json:"fields"`
Facets FacetsRequest `json:"facets"`
Explain bool `json:"explain"`
Sort []json.RawMessage `json:"sort"`
IncludeLocations bool `json:"includeLocations"`
Score string `json:"score"`
SearchAfter []string `json:"search_after"`
SearchBefore []string `json:"search_before"`
PreSearchData json.RawMessage `json:"pre_search_data"`
Params json.RawMessage `json:"params"`
}
err := json.Unmarshal(input, &temp)
if err != nil {
return err
}
if temp.Size == nil {
r.Size = 10
} else {
r.Size = *temp.Size
}
if temp.Sort == nil {
r.Sort = search.SortOrder{&search.SortScore{Desc: true}}
} else {
r.Sort, err = search.ParseSortOrderJSON(temp.Sort)
if err != nil {
return err
}
}
r.From = temp.From
r.Explain = temp.Explain
r.Highlight = temp.Highlight
r.Fields = temp.Fields
r.Facets = temp.Facets
r.IncludeLocations = temp.IncludeLocations
r.Score = temp.Score
r.SearchAfter = temp.SearchAfter
r.SearchBefore = temp.SearchBefore
r.Query, err = query.ParseQuery(temp.Q)
if err != nil {
return err
}
if r.Size < 0 {
r.Size = 10
}
if r.From < 0 {
r.From = 0
}
if IsScoreFusionRequested(r) {
if temp.Params == nil {
// If params is not present and it is requires rescoring, assign
// default values
r.Params = NewDefaultParams(r.From, r.Size)
} else {
// if it is a request that requires rescoring, parse the rescoring
// parameters.
params, err := ParseParams(r, temp.Params)
if err != nil {
return err
}
r.Params = params
}
}
if temp.PreSearchData != nil {
r.PreSearchData, err = query.ParsePreSearchData(temp.PreSearchData)
if err != nil {
return err
}
}
return nil
}
// -----------------------------------------------------------------------------
func copySearchRequest(req *SearchRequest, preSearchData map[string]interface{}) *SearchRequest {
rv := SearchRequest{
Query: req.Query,
Size: req.Size + req.From,
From: 0,
Highlight: req.Highlight,
Fields: req.Fields,
Facets: req.Facets,
Explain: req.Explain,
Sort: req.Sort.Copy(),
IncludeLocations: req.IncludeLocations,
Score: req.Score,
SearchAfter: req.SearchAfter,
SearchBefore: req.SearchBefore,
PreSearchData: preSearchData,
}
return &rv
}
func validateKNN(req *SearchRequest) error {
return nil
}
func (i *indexImpl) runKnnCollector(ctx context.Context, req *SearchRequest, reader index.IndexReader, preSearch bool) ([]*search.DocumentMatch, error) {
return nil, nil
}
func setKnnHitsInCollector(knnHits []*search.DocumentMatch, coll *collector.TopNCollector) {
}
func requestHasKNN(req *SearchRequest) bool {
return false
}
func numKNNQueries(req *SearchRequest) int {
return 0
}
func addKnnToDummyRequest(dummyReq *SearchRequest, realReq *SearchRequest) {
}
func validateAndDistributeKNNHits(knnHits []*search.DocumentMatch, indexes []Index) (map[string][]*search.DocumentMatch, error) {
return nil, nil
}
func isKNNrequestSatisfiedByPreSearch(req *SearchRequest) bool {
return false
}
func constructKnnPreSearchData(mergedOut map[string]map[string]interface{}, preSearchResult *SearchResult,
indexes []Index) (map[string]map[string]interface{}, error) {
return mergedOut, nil
}
func finalizeKNNResults(req *SearchRequest, knnHits []*search.DocumentMatch) []*search.DocumentMatch {
return knnHits
}
func newKnnPreSearchResultProcessor(req *SearchRequest) *knnPreSearchResultProcessor {
return &knnPreSearchResultProcessor{} // equivalent to nil
}
func (r *rescorer) prepareKnnRequest() {
}
func (r *rescorer) restoreKnnRequest() {
}
================================================
FILE: search_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package bleve
import (
"context"
"encoding/json"
"fmt"
"math"
"math/rand"
"os"
"reflect"
"sort"
"strconv"
"strings"
"testing"
"time"
"github.com/blevesearch/bleve/v2/analysis"
"github.com/blevesearch/bleve/v2/analysis/analyzer/custom"
"github.com/blevesearch/bleve/v2/analysis/analyzer/keyword"
"github.com/blevesearch/bleve/v2/analysis/analyzer/simple"
"github.com/blevesearch/bleve/v2/analysis/analyzer/standard"
html_char_filter "github.com/blevesearch/bleve/v2/analysis/char/html"
regexp_char_filter "github.com/blevesearch/bleve/v2/analysis/char/regexp"
"github.com/blevesearch/bleve/v2/analysis/datetime/flexible"
"github.com/blevesearch/bleve/v2/analysis/datetime/iso"
"github.com/blevesearch/bleve/v2/analysis/datetime/percent"
"github.com/blevesearch/bleve/v2/analysis/datetime/sanitized"
"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/microseconds"
"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/milliseconds"
"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/nanoseconds"
"github.com/blevesearch/bleve/v2/analysis/datetime/timestamp/seconds"
"github.com/blevesearch/bleve/v2/analysis/lang/en"
"github.com/blevesearch/bleve/v2/analysis/token/length"
"github.com/blevesearch/bleve/v2/analysis/token/lowercase"
"github.com/blevesearch/bleve/v2/analysis/token/shingle"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/single"
"github.com/blevesearch/bleve/v2/analysis/tokenizer/whitespace"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/geo"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/index/upsidedown"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
"github.com/blevesearch/bleve/v2/search/highlight/highlighter/ansi"
"github.com/blevesearch/bleve/v2/search/highlight/highlighter/html"
"github.com/blevesearch/bleve/v2/search/query"
index "github.com/blevesearch/bleve_index_api"
)
func TestSortedFacetedQuery(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
indexMapping := NewIndexMapping()
indexMapping.TypeField = "type"
indexMapping.DefaultAnalyzer = "en"
documentMapping := NewDocumentMapping()
indexMapping.AddDocumentMapping("hotel", documentMapping)
contentFieldMapping := NewTextFieldMapping()
contentFieldMapping.Index = true
contentFieldMapping.DocValues = true
documentMapping.AddFieldMappingsAt("content", contentFieldMapping)
documentMapping.AddFieldMappingsAt("country", contentFieldMapping)
index, err := New(tmpIndexPath, indexMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
if err := index.Index("1", map[string]interface{}{
"country": "india",
"content": "k",
}); err != nil {
t.Fatal(err)
}
if err := index.Index("2", map[string]interface{}{
"country": "india",
"content": "l",
}); err != nil {
t.Fatal(err)
}
if err := index.Index("3", map[string]interface{}{
"country": "india",
"content": "k",
}); err != nil {
t.Fatal(err)
}
d, err := index.DocCount()
if err != nil {
t.Fatal(err)
}
if d != 3 {
t.Errorf("expected 3, got %d", d)
}
query := NewMatchPhraseQuery("india")
query.SetField("country")
searchRequest := NewSearchRequest(query)
searchRequest.SortBy([]string{"content"})
fr := NewFacetRequest("content", 100)
searchRequest.AddFacet("content_facet", fr)
searchResults, err := index.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
expectedResults := map[string]int{"k": 2, "l": 1}
for _, v := range searchResults.Facets {
for _, v1 := range v.Terms.Terms() {
if v1.Count != expectedResults[v1.Term] {
t.Errorf("expected %d, got %d", expectedResults[v1.Term], v1.Count)
}
}
}
}
func TestMatchAllScorer(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
indexMapping := NewIndexMapping()
indexMapping.TypeField = "type"
indexMapping.DefaultAnalyzer = "en"
documentMapping := NewDocumentMapping()
contentFieldMapping := NewTextFieldMapping()
contentFieldMapping.Index = true
contentFieldMapping.Store = true
documentMapping.AddFieldMappingsAt("content", contentFieldMapping)
index, err := New(tmpIndexPath, indexMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err := index.Close()
if err != nil {
t.Fatal(err)
}
}()
if err := index.Index("1", map[string]interface{}{
"country": "india",
"content": "k",
}); err != nil {
t.Fatal(err)
}
if err := index.Index("2", map[string]interface{}{
"country": "india",
"content": "l",
}); err != nil {
t.Fatal(err)
}
if err := index.Index("3", map[string]interface{}{
"country": "india",
"content": "k",
}); err != nil {
t.Fatal(err)
}
d, err := index.DocCount()
if err != nil {
t.Fatal(err)
}
if d != 3 {
t.Errorf("expected 3, got %d", d)
}
searchRequest := NewSearchRequest(NewMatchAllQuery())
searchRequest.Score = "none"
searchResults, err := index.Search(searchRequest)
if err != nil {
t.Fatal(err)
}
if searchResults.Total != 3 {
t.Fatalf("expected all the 3 docs in the index, got %v", searchResults.Total)
}
for _, hit := range searchResults.Hits {
if hit.Score != 0.0 {
t.Fatalf("expected 0 score since score = none, got %v", hit.Score)
}
}
}
func TestSearchResultString(t *testing.T) {
tests := []struct {
result *SearchResult
str string
}{
{
result: &SearchResult{
Request: &SearchRequest{
Size: 10,
},
Total: 5,
Took: 1 * time.Second,
Hits: search.DocumentMatchCollection{
&search.DocumentMatch{},
&search.DocumentMatch{},
&search.DocumentMatch{},
&search.DocumentMatch{},
&search.DocumentMatch{},
},
},
str: "5 matches, showing 1 through 5, took 1s",
},
{
result: &SearchResult{
Request: &SearchRequest{
Size: 0,
},
Total: 5,
Hits: search.DocumentMatchCollection{},
},
str: "5 matches",
},
{
result: &SearchResult{
Request: &SearchRequest{
Size: 10,
},
Total: 0,
Hits: search.DocumentMatchCollection{},
},
str: "No matches",
},
// no search request
{
result: &SearchResult{
Total: 3,
Took: 500 * time.Millisecond,
Hits: search.DocumentMatchCollection{
&search.DocumentMatch{},
&search.DocumentMatch{},
&search.DocumentMatch{},
},
},
str: "3 matches, took 500ms",
},
}
for _, test := range tests {
srstring := test.result.String()
if !strings.HasPrefix(srstring, test.str) {
t.Errorf("expected to start %s, got %s", test.str, srstring)
}
}
}
func TestSearchResultMerge(t *testing.T) {
l := &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
MaxScore: 1,
Hits: search.DocumentMatchCollection{
&search.DocumentMatch{
ID: "a",
Score: 1,
},
},
}
r := &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
MaxScore: 2,
Hits: search.DocumentMatchCollection{
&search.DocumentMatch{
ID: "b",
Score: 2,
},
},
}
expected := &SearchResult{
Status: &SearchStatus{
Total: 2,
Successful: 2,
Errors: make(map[string]error),
},
Total: 2,
MaxScore: 2,
Hits: search.DocumentMatchCollection{
&search.DocumentMatch{
ID: "a",
Score: 1,
},
&search.DocumentMatch{
ID: "b",
Score: 2,
},
},
}
l.Merge(r)
if !reflect.DeepEqual(l, expected) {
t.Errorf("expected %#v, got %#v", expected, l)
}
}
func TestUnmarshalingSearchResult(t *testing.T) {
searchResponse := []byte(`{
"status":{
"total":1,
"failed":1,
"successful":0,
"errors":{
"default_index_362ce020b3d62b13_348f5c3c":"context deadline exceeded"
}
},
"request":{
"query":{
"match":"emp",
"field":"type",
"boost":1,
"prefix_length":0,
"fuzziness":0
},
"size":10000000,
"from":0,
"highlight":null,
"fields":[],
"facets":null,
"explain":false
},
"hits":null,
"total_hits":0,
"max_score":0,
"took":0,
"facets":null
}`)
rv := &SearchResult{
Status: &SearchStatus{
Errors: make(map[string]error),
},
}
err = json.Unmarshal(searchResponse, rv)
if err != nil {
t.Error(err)
}
if len(rv.Status.Errors) != 1 {
t.Errorf("expected 1 error, got %d", len(rv.Status.Errors))
}
}
func TestFacetNumericDateRangeRequests(t *testing.T) {
drMissingErr := fmt.Errorf("date range query must specify either start, end or both for range name 'testName'")
nrMissingErr := fmt.Errorf("numeric range query must specify either min, max or both for range name 'testName'")
drNrErr := fmt.Errorf("facet can only contain numeric ranges or date ranges, not both")
drNameDupErr := fmt.Errorf("date ranges contains duplicate name 'testName'")
nrNameDupErr := fmt.Errorf("numeric ranges contains duplicate name 'testName'")
value := float64(5)
tests := []struct {
facet *FacetRequest
result error
}{
{
facet: &FacetRequest{
Field: "Date_Range_Success_With_StartEnd",
Size: 1,
DateTimeRanges: []*dateTimeRange{
{Name: "testName", Start: time.Unix(0, 0), End: time.Now()},
},
},
result: nil,
},
{
facet: &FacetRequest{
Field: "Date_Range_Success_With_Start",
Size: 1,
DateTimeRanges: []*dateTimeRange{
{Name: "testName", Start: time.Unix(0, 0)},
},
},
result: nil,
},
{
facet: &FacetRequest{
Field: "Date_Range_Success_With_End",
Size: 1,
DateTimeRanges: []*dateTimeRange{
{Name: "testName", End: time.Now()},
},
},
result: nil,
},
{
facet: &FacetRequest{
Field: "Numeric_Range_Success_With_MinMax",
Size: 1,
NumericRanges: []*numericRange{
{Name: "testName", Min: &value, Max: &value},
},
},
result: nil,
},
{
facet: &FacetRequest{
Field: "Numeric_Range_Success_With_Min",
Size: 1,
NumericRanges: []*numericRange{
{Name: "testName", Min: &value},
},
},
result: nil,
},
{
facet: &FacetRequest{
Field: "Numeric_Range_Success_With_Max",
Size: 1,
NumericRanges: []*numericRange{
{Name: "testName", Max: &value},
},
},
result: nil,
},
{
facet: &FacetRequest{
Field: "Date_Range_Missing_Failure",
Size: 1,
DateTimeRanges: []*dateTimeRange{
{Name: "testName2", Start: time.Unix(0, 0)},
{Name: "testName1", End: time.Now()},
{Name: "testName"},
},
},
result: drMissingErr,
},
{
facet: &FacetRequest{
Field: "Numeric_Range_Missing_Failure",
Size: 1,
NumericRanges: []*numericRange{
{Name: "testName2", Min: &value},
{Name: "testName1", Max: &value},
{Name: "testName"},
},
},
result: nrMissingErr,
},
{
facet: &FacetRequest{
Field: "Numeric_And_DateRanges_Failure",
Size: 1,
NumericRanges: []*numericRange{
{Name: "testName", Max: &value},
},
DateTimeRanges: []*dateTimeRange{
{Name: "testName", End: time.Now()},
},
},
result: drNrErr,
},
{
facet: &FacetRequest{
Field: "Numeric_Range_Name_Repeat_Failure",
Size: 1,
NumericRanges: []*numericRange{
{Name: "testName", Min: &value},
{Name: "testName", Max: &value},
},
},
result: nrNameDupErr,
},
{
facet: &FacetRequest{
Field: "Date_Range_Name_Repeat_Failure",
Size: 1,
DateTimeRanges: []*dateTimeRange{
{Name: "testName", Start: time.Unix(0, 0)},
{Name: "testName", End: time.Now()},
},
},
result: drNameDupErr,
},
}
for _, test := range tests {
result := test.facet.Validate()
if !reflect.DeepEqual(result, test.result) {
t.Errorf("expected %#v, got %#v", test.result, result)
}
}
}
func TestSearchResultFacetsMerge(t *testing.T) {
lowmed := "2010-01-01"
medhi := "2011-01-01"
hihigher := "2012-01-01"
fr := &search.FacetResult{
Field: "birthday",
Total: 100,
Missing: 25,
Other: 25,
DateRanges: []*search.DateRangeFacet{
{
Name: "low",
End: &lowmed,
Count: 25,
},
{
Name: "med",
Count: 24,
Start: &lowmed,
End: &medhi,
},
{
Name: "hi",
Count: 1,
Start: &medhi,
End: &hihigher,
},
},
}
frs := search.FacetResults{
"birthdays": fr,
}
l := &SearchResult{
Status: &SearchStatus{
Total: 10,
Successful: 1,
Errors: make(map[string]error),
},
Total: 10,
MaxScore: 1,
}
r := &SearchResult{
Status: &SearchStatus{
Total: 1,
Successful: 1,
Errors: make(map[string]error),
},
Total: 1,
MaxScore: 2,
Facets: frs,
}
expected := &SearchResult{
Status: &SearchStatus{
Total: 11,
Successful: 2,
Errors: make(map[string]error),
},
Total: 11,
MaxScore: 2,
Facets: frs,
}
l.Merge(r)
if !reflect.DeepEqual(l, expected) {
t.Errorf("expected %#v, got %#v", expected, l)
}
}
func TestMemoryNeededForSearchResult(t *testing.T) {
query := NewTermQuery("blah")
req := NewSearchRequest(query)
var sr SearchResult
expect := sr.Size()
var dm search.DocumentMatch
expect += 10 * dm.Size()
estimate := MemoryNeededForSearchResult(req)
if estimate != uint64(expect) {
t.Errorf("estimate not what is expected: %v != %v", estimate, expect)
}
}
// https://github.com/blevesearch/bleve/issues/954
func TestNestedBooleanSearchers(t *testing.T) {
// create an index with a custom analyzer
idxMapping := NewIndexMapping()
if err := idxMapping.AddCustomAnalyzer("3xbla", map[string]interface{}{
"type": custom.Name,
"tokenizer": whitespace.Name,
"token_filters": []interface{}{lowercase.Name, "stop_en"},
}); err != nil {
t.Fatal(err)
}
idxMapping.DefaultAnalyzer = "3xbla"
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, idxMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
// create and insert documents as a batch
batch := idx.NewBatch()
matches := 0
for i := 0; i < 100; i++ {
hostname := fmt.Sprintf("planner_hostname_%d", i%5)
metadata := map[string]string{"region": fmt.Sprintf("planner_us-east-%d", i%5)}
// Expected matches
if (hostname == "planner_hostname_1" || hostname == "planner_hostname_2") &&
metadata["region"] == "planner_us-east-1" {
matches++
}
doc := document.NewDocument(strconv.Itoa(i))
doc.Fields = []document.Field{
document.NewTextFieldCustom("hostname", []uint64{}, []byte(hostname),
index.IndexField,
&analysis.DefaultAnalyzer{
Tokenizer: single.NewSingleTokenTokenizer(),
TokenFilters: []analysis.TokenFilter{
lowercase.NewLowerCaseFilter(),
},
},
),
}
for k, v := range metadata {
doc.AddField(document.NewTextFieldWithIndexingOptions(
fmt.Sprintf("metadata.%s", k), []uint64{}, []byte(v), index.IndexField))
}
doc.CompositeFields = []*document.CompositeField{
document.NewCompositeFieldWithIndexingOptions(
"_all", true, []string{"text"}, []string{},
index.IndexField|index.IncludeTermVectors),
}
if err = batch.IndexAdvanced(doc); err != nil {
t.Fatal(err)
}
}
if err = idx.Batch(batch); err != nil {
t.Fatal(err)
}
que, err := query.ParseQuery([]byte(
`{
"conjuncts": [
{
"must": {
"conjuncts": [
{
"disjuncts": [
{
"match": "planner_hostname_1",
"field": "hostname"
},
{
"match": "planner_hostname_2",
"field": "hostname"
}
]
}
]
}
},
{
"must": {
"conjuncts": [
{
"match": "planner_us-east-1",
"field": "metadata.region"
}
]
}
}
]
}`,
))
if err != nil {
t.Fatal(err)
}
req := NewSearchRequest(que)
req.Size = 100
req.Fields = []string{"hostname", "metadata.region"}
searchResults, err := idx.Search(req)
if err != nil {
t.Fatal(err)
}
if matches != len(searchResults.Hits) {
t.Fatalf("Unexpected result set, %v != %v", matches, len(searchResults.Hits))
}
}
func TestNestedBooleanMustNotSearcherUpsidedown(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
// create an index with default settings
idxMapping := NewIndexMapping()
idx, err := New(tmpIndexPath, idxMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
// create and insert documents as a batch
batch := idx.NewBatch()
docs := []struct {
id string
hasRole bool
investigationId string
}{
{
id: "1@1",
hasRole: true,
investigationId: "1",
},
{
id: "1@2",
hasRole: false,
investigationId: "2",
},
{
id: "2@1",
hasRole: true,
investigationId: "1",
},
{
id: "2@2",
hasRole: false,
investigationId: "2",
},
{
id: "3@1",
hasRole: true,
investigationId: "1",
},
{
id: "3@2",
hasRole: false,
investigationId: "2",
},
{
id: "4@1",
hasRole: true,
investigationId: "1",
},
{
id: "5@1",
hasRole: true,
investigationId: "1",
},
{
id: "6@1",
hasRole: true,
investigationId: "1",
},
{
id: "7@1",
hasRole: true,
investigationId: "1",
},
}
for i := 0; i < len(docs); i++ {
doc := document.NewDocument(docs[i].id)
doc.Fields = []document.Field{
document.NewTextField("id", []uint64{}, []byte(docs[i].id)),
document.NewBooleanField("hasRole", []uint64{}, docs[i].hasRole),
document.NewTextField("investigationId", []uint64{}, []byte(docs[i].investigationId)),
}
doc.CompositeFields = []*document.CompositeField{
document.NewCompositeFieldWithIndexingOptions(
"_all", true, []string{"text"}, []string{},
index.IndexField|index.IncludeTermVectors),
}
if err = batch.IndexAdvanced(doc); err != nil {
t.Fatal(err)
}
}
if err = idx.Batch(batch); err != nil {
t.Fatal(err)
}
tq := NewTermQuery("1")
tq.SetField("investigationId")
// using must not, for cases that the field did not exists at all
hasRole := NewBoolFieldQuery(true)
hasRole.SetField("hasRole")
noRole := NewBooleanQuery()
noRole.AddMustNot(hasRole)
oneRolesOrNoRoles := NewBooleanQuery()
oneRolesOrNoRoles.AddShould(noRole)
oneRolesOrNoRoles.SetMinShould(1)
q := NewConjunctionQuery(tq, oneRolesOrNoRoles)
sr := NewSearchRequestOptions(q, 100, 0, false)
sr.Fields = []string{"hasRole"}
sr.Highlight = NewHighlight()
res, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if res.Total != 0 {
t.Fatalf("Unexpected result, %v != 0", res.Total)
}
}
func TestSearchScorchOverEmptyKeyword(t *testing.T) {
defaultIndexType := Config.DefaultIndexType
Config.DefaultIndexType = scorch.Name
dmap := mapping.NewDocumentMapping()
dmap.DefaultAnalyzer = standard.Name
fm := mapping.NewTextFieldMapping()
fm.Analyzer = keyword.Name
fm1 := mapping.NewTextFieldMapping()
fm1.Analyzer = standard.Name
dmap.AddFieldMappingsAt("id", fm)
dmap.AddFieldMappingsAt("name", fm1)
imap := mapping.NewIndexMapping()
imap.DefaultMapping = dmap
imap.DefaultAnalyzer = standard.Name
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
Config.DefaultIndexType = defaultIndexType
}()
for i := 0; i < 10; i++ {
err = idx.Index(fmt.Sprint(i), map[string]string{"name": fmt.Sprintf("test%d", i), "id": ""})
if err != nil {
t.Fatal(err)
}
}
count, err := idx.DocCount()
if err != nil {
t.Fatal(err)
}
if count != 10 {
t.Fatalf("Unexpected doc count: %v, expected 10", count)
}
q := query.NewWildcardQuery("test*")
sr := NewSearchRequestOptions(q, 40, 0, false)
res, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if res.Total != 10 {
t.Fatalf("Unexpected search hits: %v, expected 10", res.Total)
}
}
func TestMultipleNestedBooleanMustNotSearchersOnScorch(t *testing.T) {
defaultIndexType := Config.DefaultIndexType
Config.DefaultIndexType = scorch.Name
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
// create an index with default settings
idxMapping := NewIndexMapping()
idx, err := New(tmpIndexPath, idxMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
Config.DefaultIndexType = defaultIndexType
}()
// create and insert documents as a batch
batch := idx.NewBatch()
doc := document.NewDocument("1-child-0")
doc.Fields = []document.Field{
document.NewTextField("id", []uint64{}, []byte("1-child-0")),
document.NewBooleanField("hasRole", []uint64{}, false),
document.NewTextField("roles", []uint64{}, []byte("R1")),
document.NewNumericField("type", []uint64{}, 0),
}
doc.CompositeFields = []*document.CompositeField{
document.NewCompositeFieldWithIndexingOptions(
"_all", true, []string{"text"}, []string{},
index.IndexField|index.IncludeTermVectors),
}
if err = batch.IndexAdvanced(doc); err != nil {
t.Fatal(err)
}
docs := []struct {
id string
hasRole bool
typ int
}{
{
id: "16d6fa37-48fd-4dea-8b3d-a52bddf73951",
hasRole: false,
typ: 9,
},
{
id: "18fa9eb2-8b1f-46f0-8b56-b4c551213f78",
hasRole: false,
typ: 9,
},
{
id: "3085855b-d74b-474a-86c3-9bf3e4504382",
hasRole: false,
typ: 9,
},
{
id: "38ef5d28-0f85-4fb0-8a94-dd20751c3364",
hasRole: false,
typ: 9,
},
}
for i := 0; i < len(docs); i++ {
doc := document.NewDocument(docs[i].id)
doc.Fields = []document.Field{
document.NewTextField("id", []uint64{}, []byte(docs[i].id)),
document.NewBooleanField("hasRole", []uint64{}, docs[i].hasRole),
document.NewNumericField("type", []uint64{}, float64(docs[i].typ)),
}
doc.CompositeFields = []*document.CompositeField{
document.NewCompositeFieldWithIndexingOptions(
"_all", true, []string{"text"}, []string{},
index.IndexField|index.IncludeTermVectors),
}
if err = batch.IndexAdvanced(doc); err != nil {
t.Fatal(err)
}
}
if err = idx.Batch(batch); err != nil {
t.Fatal(err)
}
batch = idx.NewBatch()
// Update 1st doc
doc = document.NewDocument("1-child-0")
doc.Fields = []document.Field{
document.NewTextField("id", []uint64{}, []byte("1-child-0")),
document.NewBooleanField("hasRole", []uint64{}, false),
document.NewNumericField("type", []uint64{}, 0),
}
doc.CompositeFields = []*document.CompositeField{
document.NewCompositeFieldWithIndexingOptions(
"_all", true, []string{"text"}, []string{},
index.IndexField|index.IncludeTermVectors),
}
if err = batch.IndexAdvanced(doc); err != nil {
t.Fatal(err)
}
if err = idx.Batch(batch); err != nil {
t.Fatal(err)
}
inclusive := true
val := float64(9)
q := query.NewNumericRangeInclusiveQuery(&val, &val, &inclusive, &inclusive)
q.SetField("type")
initialQuery := query.NewBooleanQuery(nil, nil, []query.Query{q})
// using must not, for cases that the field did not exists at all
hasRole := NewBoolFieldQuery(true)
hasRole.SetField("hasRole")
noRole := NewBooleanQuery()
noRole.AddMustNot(hasRole)
rq := query.NewBooleanQuery([]query.Query{initialQuery, noRole}, nil, nil)
sr := NewSearchRequestOptions(rq, 100, 0, false)
sr.Fields = []string{"id", "hasRole", "type"}
sr.Highlight = NewHighlight()
res, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if res.Total != 1 {
t.Fatalf("Unexpected result, %v != 1", res.Total)
}
}
func testBooleanMustNotSearcher(t *testing.T, indexName string) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
im := NewIndexMapping()
idx, err := NewUsing(tmpIndexPath, im, indexName, Config.DefaultKVStore, nil)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
docs := []struct {
Name string
HasRole bool
}{
{
Name: "13900",
},
{
Name: "13901",
},
{
Name: "13965",
},
{
Name: "13966",
HasRole: true,
},
{
Name: "13967",
HasRole: true,
},
}
for _, doc := range docs {
err := idx.Index(doc.Name, doc)
if err != nil {
t.Fatal(err)
}
}
lhs := NewDocIDQuery([]string{"13965", "13966", "13967"})
hasRole := NewBoolFieldQuery(true)
hasRole.SetField("HasRole")
rhs := NewBooleanQuery()
rhs.AddMustNot(hasRole)
compareLeftRightAndConjunction := func(idx Index, left, right query.Query) error {
// left
lr := NewSearchRequestOptions(left, 100, 0, false)
lres, err := idx.Search(lr)
if err != nil {
return fmt.Errorf("error left: %v", err)
}
lresIds := map[string]struct{}{}
for i := range lres.Hits {
lresIds[lres.Hits[i].ID] = struct{}{}
}
// right
rr := NewSearchRequestOptions(right, 100, 0, false)
rres, err := idx.Search(rr)
if err != nil {
return fmt.Errorf("error right: %v", err)
}
rresIds := map[string]struct{}{}
for i := range rres.Hits {
rresIds[rres.Hits[i].ID] = struct{}{}
}
// conjunction
cr := NewSearchRequestOptions(NewConjunctionQuery(left, right), 100, 0, false)
cres, err := idx.Search(cr)
if err != nil {
return fmt.Errorf("error conjunction: %v", err)
}
for i := range cres.Hits {
if _, ok := lresIds[cres.Hits[i].ID]; ok {
if _, ok := rresIds[cres.Hits[i].ID]; !ok {
return fmt.Errorf("error id %s missing from right", cres.Hits[i].ID)
}
} else {
return fmt.Errorf("error id %s missing from left", cres.Hits[i].ID)
}
}
return nil
}
err = compareLeftRightAndConjunction(idx, lhs, rhs)
if err != nil {
t.Fatal(err)
}
}
func TestBooleanMustNotSearcherUpsidedown(t *testing.T) {
testBooleanMustNotSearcher(t, upsidedown.Name)
}
func TestBooleanMustNotSearcherScorch(t *testing.T) {
testBooleanMustNotSearcher(t, scorch.Name)
}
func TestQueryStringEmptyConjunctionSearcher(t *testing.T) {
mapping := NewIndexMapping()
mapping.DefaultAnalyzer = keyword.Name
index, err := NewMemOnly(mapping)
if err != nil {
t.Fatal(err)
}
defer func() {
_ = index.Close()
}()
query := NewQueryStringQuery("foo:bar +baz:\"\"")
searchReq := NewSearchRequest(query)
_, _ = index.Search(searchReq)
}
func TestDisjunctionQueryIncorrectMin(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
// create an index with default settings
idxMapping := NewIndexMapping()
idx, err := New(tmpIndexPath, idxMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
// create and insert documents as a batch
batch := idx.NewBatch()
docs := []struct {
field1 string
field2 int
}{
{
field1: "one",
field2: 1,
},
{
field1: "two",
field2: 2,
},
}
for i := 0; i < len(docs); i++ {
doc := document.NewDocument(strconv.Itoa(docs[i].field2))
doc.Fields = []document.Field{
document.NewTextField("field1", []uint64{}, []byte(docs[i].field1)),
document.NewNumericField("field2", []uint64{}, float64(docs[i].field2)),
}
doc.CompositeFields = []*document.CompositeField{
document.NewCompositeFieldWithIndexingOptions(
"_all", true, []string{"text"}, []string{},
index.IndexField|index.IncludeTermVectors),
}
if err = batch.IndexAdvanced(doc); err != nil {
t.Fatal(err)
}
}
if err = idx.Batch(batch); err != nil {
t.Fatal(err)
}
tq := NewTermQuery("one")
dq := NewDisjunctionQuery(tq)
dq.SetMin(2)
sr := NewSearchRequestOptions(dq, 1, 0, false)
res, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if res.Total > 0 {
t.Fatalf("Expected 0 matches as disjunction query contains a single clause"+
" but got: %v", res.Total)
}
}
func TestMatchQueryPartialMatch(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc1 := map[string]interface{}{
"description": "Patrick is first name Stewart is last name",
}
doc2 := map[string]interface{}{
"description": "Manager given name is Patrick",
}
batch := idx.NewBatch()
if err = batch.Index("doc1", doc1); err != nil {
t.Fatal(err)
}
if err = batch.Index("doc2", doc2); err != nil {
t.Fatal(err)
}
if err = idx.Batch(batch); err != nil {
t.Fatal(err)
}
// Test 1 - Both Docs hit, doc 1 = Full Match and doc 2 = Partial Match
mq1 := NewMatchQuery("patrick stewart")
mq1.SetField("description")
sr := NewSearchRequest(mq1)
sr.Explain = true
res, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if res.Total != 2 {
t.Errorf("Expected 2 results, but got: %v", res.Total)
}
for _, hit := range res.Hits {
switch hit.ID {
case "doc1":
if hit.Expl.PartialMatch {
t.Errorf("Expected doc1 to be a full match")
}
case "doc2":
if !hit.Expl.PartialMatch {
t.Errorf("Expected doc2 to be a partial match")
}
default:
t.Errorf("Unexpected document ID: %s", hit.ID)
}
}
// Test 2 - Both Docs hit, doc 1 = Partial Match and doc 2 = Full Match
mq2 := NewMatchQuery("paltric manner")
mq2.SetField("description")
mq2.SetFuzziness(2)
sr = NewSearchRequest(mq2)
sr.Explain = true
res, err = idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if res.Total != 2 {
t.Errorf("Expected 2 results, but got: %v", res.Total)
}
for _, hit := range res.Hits {
switch hit.ID {
case "doc1":
if !hit.Expl.PartialMatch {
t.Errorf("Expected doc1 to be a partial match")
}
case "doc2":
if hit.Expl.PartialMatch {
t.Errorf("Expected doc2 to be a full match")
}
default:
t.Errorf("Unexpected document ID: %s", hit.ID)
}
}
// Test 3 - Two Docs hits, both full match
mq3 := NewMatchQuery("patrick")
mq3.SetField("description")
sr = NewSearchRequest(mq3)
sr.Explain = true
res, err = idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if res.Total != 2 {
t.Errorf("Expected 2 results, but got: %v", res.Total)
}
for _, hit := range res.Hits {
switch hit.ID {
case "doc1":
if hit.Expl.PartialMatch {
t.Errorf("Expected doc1 to be a full match")
}
case "doc2":
if hit.Expl.PartialMatch {
t.Errorf("Expected doc2 to be a full match")
}
default:
t.Errorf("Unexpected document ID: %s", hit.ID)
}
}
// Test 4 - Two Docs hits, both partial match
mq4 := NewMatchQuery("patrick stewart manager")
mq4.SetField("description")
sr = NewSearchRequest(mq4)
sr.Explain = true
res, err = idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if res.Total != 2 {
t.Errorf("Expected 2 results, but got: %v", res.Total)
}
for _, hit := range res.Hits {
switch hit.ID {
case "doc1":
if !hit.Expl.PartialMatch {
t.Errorf("Expected doc1 to be a full match")
}
case "doc2":
if !hit.Expl.PartialMatch {
t.Errorf("Expected doc2 to be a full match")
}
default:
t.Errorf("Unexpected document ID: %s", hit.ID)
}
}
// Test 5 - Match Query AND operator always results in full match
mq5 := NewMatchQuery("patrick stewart")
mq5.SetField("description")
mq5.SetOperator(1)
sr = NewSearchRequest(mq5)
sr.Explain = true
res, err = idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if res.Total != 1 {
t.Errorf("Expected 1 result, but got: %v", res.Total)
}
hit := res.Hits[0]
if hit.ID != "doc1" || hit.Expl.PartialMatch {
t.Errorf("Expected doc1 to be a full match")
}
}
func TestBooleanShouldMinPropagation(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc1 := map[string]interface{}{
"dept": "queen",
"name": "cersei lannister",
}
doc2 := map[string]interface{}{
"dept": "kings guard",
"name": "jaime lannister",
}
batch := idx.NewBatch()
if err = batch.Index("doc1", doc1); err != nil {
t.Fatal(err)
}
if err = batch.Index("doc2", doc2); err != nil {
t.Fatal(err)
}
if err = idx.Batch(batch); err != nil {
t.Fatal(err)
}
// term dictionaries in the index for field..
// dept: queen kings guard
// name: cersei jaime lannister
// the following match query would match doc2
mq1 := NewMatchQuery("kings guard")
mq1.SetField("dept")
// the following match query would match both doc1 and doc2,
// as both docs share common lastname
mq2 := NewMatchQuery("jaime lannister")
mq2.SetField("name")
bq := NewBooleanQuery()
bq.AddShould(mq1)
bq.AddMust(mq2)
sr := NewSearchRequest(bq)
res, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if res.Total != 2 {
t.Errorf("Expected 2 results, but got: %v", res.Total)
}
}
func TestDisjunctionMinPropagation(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, NewIndexMapping())
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc1 := map[string]interface{}{
"dept": "finance",
"name": "xyz",
}
doc2 := map[string]interface{}{
"dept": "marketing",
"name": "xyz",
}
doc3 := map[string]interface{}{
"dept": "engineering",
"name": "abc",
}
batch := idx.NewBatch()
if err = batch.Index("doc1", doc1); err != nil {
t.Fatal(err)
}
if err = batch.Index("doc2", doc2); err != nil {
t.Fatal(err)
}
if err = batch.Index("doc3", doc3); err != nil {
t.Fatal(err)
}
if err = idx.Batch(batch); err != nil {
t.Fatal(err)
}
mq1 := NewMatchQuery("finance")
mq2 := NewMatchQuery("marketing")
dq := NewDisjunctionQuery(mq1, mq2)
dq.SetMin(3)
dq2 := NewDisjunctionQuery(dq)
dq2.SetMin(1)
sr := NewSearchRequest(dq2)
res, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if res.Total != 0 {
t.Fatalf("Expect 0 results, but got: %v", res.Total)
}
}
func TestDuplicateLocationsIssue1168(t *testing.T) {
fm1 := NewTextFieldMapping()
fm1.Analyzer = keyword.Name
fm1.Name = "name1"
dm := NewDocumentStaticMapping()
dm.AddFieldMappingsAt("name", fm1)
m := NewIndexMapping()
m.DefaultMapping = dm
idx, err := NewMemOnly(m)
if err != nil {
t.Fatalf("bleve new err: %v", err)
}
err = idx.Index("x", map[string]interface{}{
"name": "marty",
})
if err != nil {
t.Fatalf("bleve index err: %v", err)
}
q1 := NewTermQuery("marty")
q2 := NewTermQuery("marty")
dq := NewDisjunctionQuery(q1, q2)
sreq := NewSearchRequest(dq)
sreq.Fields = []string{"*"}
sreq.Highlight = NewHighlightWithStyle(html.Name)
sres, err := idx.Search(sreq)
if err != nil {
t.Fatalf("bleve search err: %v", err)
}
if len(sres.Hits[0].Locations["name1"]["marty"]) != 1 {
t.Fatalf("duplicate marty")
}
}
func TestBooleanMustSingleMatchNone(t *testing.T) {
idxMapping := NewIndexMapping()
if err := idxMapping.AddCustomTokenFilter(length.Name, map[string]interface{}{
"min": 3.0,
"max": 5.0,
"type": length.Name,
}); err != nil {
t.Fatal(err)
}
if err := idxMapping.AddCustomAnalyzer("custom1", map[string]interface{}{
"type": "custom",
"tokenizer": "single",
"token_filters": []interface{}{length.Name},
}); err != nil {
t.Fatal(err)
}
idxMapping.DefaultAnalyzer = "custom1"
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, idxMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := map[string]interface{}{
"languages_known": "Dutch",
"dept": "Sales",
}
batch := idx.NewBatch()
if err = batch.Index("doc", doc); err != nil {
t.Fatal(err)
}
if err = idx.Batch(batch); err != nil {
t.Fatal(err)
}
// this is a successful match
matchSales := NewMatchQuery("Sales")
matchSales.SetField("dept")
// this would spin off a MatchNoneSearcher as the
// token filter rules out the word "French"
matchFrench := NewMatchQuery("French")
matchFrench.SetField("languages_known")
bq := NewBooleanQuery()
bq.AddShould(matchSales)
bq.AddMust(matchFrench)
sr := NewSearchRequest(bq)
res, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if res.Total != 0 {
t.Fatalf("Expected 0 results but got: %v", res.Total)
}
}
func TestBooleanMustNotSingleMatchNone(t *testing.T) {
idxMapping := NewIndexMapping()
if err := idxMapping.AddCustomTokenFilter(shingle.Name, map[string]interface{}{
"min": 3.0,
"max": 5.0,
"type": shingle.Name,
}); err != nil {
t.Fatal(err)
}
if err := idxMapping.AddCustomAnalyzer("custom1", map[string]interface{}{
"type": "custom",
"tokenizer": "unicode",
"token_filters": []interface{}{shingle.Name},
}); err != nil {
t.Fatal(err)
}
idxMapping.DefaultAnalyzer = "custom1"
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, idxMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := map[string]interface{}{
"languages_known": "Dutch",
"dept": "Sales",
}
batch := idx.NewBatch()
if err = batch.Index("doc", doc); err != nil {
t.Fatal(err)
}
if err = idx.Batch(batch); err != nil {
t.Fatal(err)
}
// this is a successful match
matchSales := NewMatchQuery("Sales")
matchSales.SetField("dept")
// this would spin off a MatchNoneSearcher as the
// token filter rules out the word "Dutch"
matchDutch := NewMatchQuery("Dutch")
matchDutch.SetField("languages_known")
matchEngineering := NewMatchQuery("Engineering")
matchEngineering.SetField("dept")
bq := NewBooleanQuery()
bq.AddShould(matchSales)
bq.AddMustNot(matchDutch, matchEngineering)
sr := NewSearchRequest(bq)
res, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if res.Total != 0 {
t.Fatalf("Expected 0 results but got: %v", res.Total)
}
}
func TestBooleanSearchBug1185(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
of := NewTextFieldMapping()
of.Analyzer = keyword.Name
of.Name = "owner"
dm := NewDocumentMapping()
dm.AddFieldMappingsAt("owner", of)
m := NewIndexMapping()
m.DefaultMapping = dm
idx, err := NewUsing(tmpIndexPath, m, "scorch", "scorch", nil)
if err != nil {
t.Fatal(err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
err = idx.Index("17112", map[string]interface{}{
"owner": "marty",
"type": "A Demo Type",
})
if err != nil {
t.Fatal(err)
}
err = idx.Index("17139", map[string]interface{}{
"type": "A Demo Type",
})
if err != nil {
t.Fatal(err)
}
err = idx.Index("177777", map[string]interface{}{
"type": "x",
})
if err != nil {
t.Fatal(err)
}
err = idx.Index("177778", map[string]interface{}{
"type": "A Demo Type",
})
if err != nil {
t.Fatal(err)
}
err = idx.Index("17140", map[string]interface{}{
"type": "A Demo Type",
})
if err != nil {
t.Fatal(err)
}
err = idx.Index("17000", map[string]interface{}{
"owner": "marty",
"type": "x",
})
if err != nil {
t.Fatal(err)
}
err = idx.Index("17141", map[string]interface{}{
"type": "A Demo Type",
})
if err != nil {
t.Fatal(err)
}
err = idx.Index("17428", map[string]interface{}{
"owner": "marty",
"type": "A Demo Type",
})
if err != nil {
t.Fatal(err)
}
err = idx.Index("17113", map[string]interface{}{
"owner": "marty",
"type": "x",
})
if err != nil {
t.Fatal(err)
}
matchTypeQ := NewMatchPhraseQuery("A Demo Type")
matchTypeQ.SetField("type")
matchAnyOwnerRegQ := NewRegexpQuery(".+")
matchAnyOwnerRegQ.SetField("owner")
matchNoOwner := NewBooleanQuery()
matchNoOwner.AddMustNot(matchAnyOwnerRegQ)
notNoOwner := NewBooleanQuery()
notNoOwner.AddMustNot(matchNoOwner)
matchTypeAndNoOwner := NewConjunctionQuery()
matchTypeAndNoOwner.AddQuery(matchTypeQ)
matchTypeAndNoOwner.AddQuery(notNoOwner)
req := NewSearchRequest(matchTypeAndNoOwner)
res, err := idx.Search(req)
if err != nil {
t.Fatal(err)
}
// query 2
matchTypeAndNoOwnerBoolean := NewBooleanQuery()
matchTypeAndNoOwnerBoolean.AddMust(matchTypeQ)
matchTypeAndNoOwnerBoolean.AddMustNot(matchNoOwner)
req2 := NewSearchRequest(matchTypeAndNoOwnerBoolean)
res2, err := idx.Search(req2)
if err != nil {
t.Fatal(err)
}
if len(res.Hits) != len(res2.Hits) {
t.Fatalf("expected same number of hits, got: %d and %d", len(res.Hits), len(res2.Hits))
}
}
func TestSearchScoreNone(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := NewUsing(tmpIndexPath, NewIndexMapping(), scorch.Name, Config.DefaultKVStore, nil)
if err != nil {
t.Fatal(err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := map[string]interface{}{
"field1": "asd fgh jkl",
"field2": "more content blah blah",
"id": "doc",
}
if err = idx.Index("doc", doc); err != nil {
t.Fatal(err)
}
q := NewQueryStringQuery("content")
sr := NewSearchRequest(q)
sr.IncludeLocations = true
sr.Score = "none"
res, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if len(res.Hits) != 1 {
t.Fatal("unexpected number of hits")
}
if len(res.Hits[0].Locations) != 1 {
t.Fatal("unexpected locations for the hit")
}
if res.Hits[0].Score != 0 {
t.Fatal("unexpected score for the hit")
}
}
func TestGeoDistanceIssue1301(t *testing.T) {
shopMapping := NewDocumentMapping()
shopMapping.AddFieldMappingsAt("GEO", NewGeoPointFieldMapping())
shopIndexMapping := NewIndexMapping()
shopIndexMapping.DefaultMapping = shopMapping
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := NewUsing(tmpIndexPath, shopIndexMapping, scorch.Name, Config.DefaultKVStore, nil)
if err != nil {
t.Fatal(err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
for i, g := range []string{"wecpkbeddsmf", "wecpk8tne453", "wecpkb80s09t"} {
if err = idx.Index(strconv.Itoa(i), map[string]interface{}{
"ID": i,
"GEO": g,
}); err != nil {
t.Fatal(err)
}
}
// Not setting "Field" for the following query, targets it against the _all
// field and this is returning inconsistent results, when there's another
// field indexed along with the geopoint which is numeric.
// As reported in: https://github.com/blevesearch/bleve/issues/1301
lat, lon := 22.371154, 114.112603
q := NewGeoDistanceQuery(lon, lat, "1km")
req := NewSearchRequest(q)
sr, err := idx.Search(req)
if err != nil {
t.Fatal(err)
}
if sr.Total != 3 {
t.Fatalf("Size expected: 3, actual %d\n", sr.Total)
}
}
func TestSearchHighlightingWithRegexpReplacement(t *testing.T) {
idxMapping := NewIndexMapping()
if err := idxMapping.AddCustomCharFilter(regexp_char_filter.Name, map[string]interface{}{
"regexp": `([a-z])\s+(\d)`,
"replace": "ooooo$1-$2",
"type": regexp_char_filter.Name,
}); err != nil {
t.Fatal(err)
}
if err := idxMapping.AddCustomAnalyzer("regexp_replace", map[string]interface{}{
"type": custom.Name,
"tokenizer": "unicode",
"char_filters": []string{
regexp_char_filter.Name,
},
}); err != nil {
t.Fatal(err)
}
idxMapping.DefaultAnalyzer = "regexp_replace"
idxMapping.StoreDynamic = true
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := NewUsing(tmpIndexPath, idxMapping, scorch.Name, Config.DefaultKVStore, nil)
if err != nil {
t.Fatal(err)
}
defer func() {
err := idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := map[string]interface{}{
"status": "fool 10",
}
batch := idx.NewBatch()
if err = batch.Index("doc", doc); err != nil {
t.Fatal(err)
}
if err = idx.Batch(batch); err != nil {
t.Fatal(err)
}
query := NewMatchQuery("fool 10")
sreq := NewSearchRequest(query)
sreq.Fields = []string{"*"}
sreq.Highlight = NewHighlightWithStyle(ansi.Name)
sres, err := idx.Search(sreq)
if err != nil {
t.Fatal(err)
}
if sres.Total != 1 {
t.Fatalf("Expected 1 hit, got: %v", sres.Total)
}
}
func TestAnalyzerInheritance(t *testing.T) {
tests := []struct {
name string
mappingStr string
doc map[string]interface{}
queryField string
queryTerm string
}{
{
/*
index_mapping: keyword
default_mapping: ""
-> child field (should inherit keyword)
*/
name: "Child field to inherit index mapping's default analyzer",
mappingStr: `{"default_mapping":{"enabled":true,"dynamic":false,"properties":` +
`{"city":{"enabled":true,"dynamic":false,"fields":[{"name":"city","type":"text",` +
`"store":false,"index":true}]}}},"default_analyzer":"keyword"}`,
doc: map[string]interface{}{"city": "San Francisco"},
queryField: "city",
queryTerm: "San Francisco",
},
{
/*
index_mapping: standard
default_mapping: keyword
-> child field (should inherit keyword)
*/
name: "Child field to inherit default mapping's default analyzer",
mappingStr: `{"default_mapping":{"enabled":true,"dynamic":false,"properties":` +
`{"city":{"enabled":true,"dynamic":false,"fields":[{"name":"city","type":"text",` +
`"index":true}]}},"default_analyzer":"keyword"},"default_analyzer":"standard"}`,
doc: map[string]interface{}{"city": "San Francisco"},
queryField: "city",
queryTerm: "San Francisco",
},
{
/*
index_mapping: standard
default_mapping: keyword (dynamic)
-> search over field to (should inherit keyword)
*/
name: "Child field to inherit default mapping's default analyzer",
mappingStr: `{"default_mapping":{"enabled":true,"dynamic":true,"default_analyzer":"keyword"}` +
`,"default_analyzer":"standard"}`,
doc: map[string]interface{}{"city": "San Francisco"},
queryField: "city",
queryTerm: "San Francisco",
},
{
/*
index_mapping: standard
default_mapping: keyword
-> child mapping: ""
-> child field: (should inherit keyword)
*/
name: "Nested child field to inherit default mapping's default analyzer",
mappingStr: `{"default_mapping":{"enabled":true,"dynamic":false,"default_analyzer":` +
`"keyword","properties":{"address":{"enabled":true,"dynamic":false,"properties":` +
`{"city":{"enabled":true,"dynamic":false,"fields":[{"name":"city","type":"text",` +
`"index":true}]}}}}},"default_analyzer":"standard"}`,
doc: map[string]interface{}{
"address": map[string]interface{}{"city": "San Francisco"},
},
queryField: "address.city",
queryTerm: "San Francisco",
},
{
/*
index_mapping: standard
default_mapping: ""
-> child mapping: "keyword"
-> child mapping: ""
-> child field: (should inherit keyword)
*/
name: "Nested child field to inherit first child mapping's default analyzer",
mappingStr: `{"default_mapping":{"enabled":true,"dynamic":false,"properties":` +
`{"address":{"enabled":true,"dynamic":false,"default_analyzer":"keyword",` +
`"properties":{"state":{"enabled":true,"dynamic":false,"properties":{"city":` +
`{"enabled":true,"dynamic":false,"fields":[{"name":"city","type":"text",` +
`"store":false,"index":true}]}}}}}}},"default_analyer":"standard"}`,
doc: map[string]interface{}{
"address": map[string]interface{}{
"state": map[string]interface{}{"city": "San Francisco"},
},
},
queryField: "address.state.city",
queryTerm: "San Francisco",
},
}
for i := range tests {
t.Run(tests[i].name, func(t *testing.T) {
idxMapping := NewIndexMapping()
if err := idxMapping.UnmarshalJSON([]byte(tests[i].mappingStr)); err != nil {
t.Fatal(err)
}
tmpIndexPath := createTmpIndexPath(t)
idx, err := New(tmpIndexPath, idxMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
if err := idx.Close(); err != nil {
t.Fatal(err)
}
}()
if err = idx.Index("doc", tests[i].doc); err != nil {
t.Fatal(err)
}
q := NewTermQuery(tests[i].queryTerm)
q.SetField(tests[i].queryField)
res, err := idx.Search(NewSearchRequest(q))
if err != nil {
t.Fatal(err)
}
if len(res.Hits) != 1 {
t.Errorf("Unexpected number of hits: %v", len(res.Hits))
}
})
}
}
func TestHightlightingWithHTMLCharacterFilter(t *testing.T) {
idxMapping := NewIndexMapping()
if err := idxMapping.AddCustomAnalyzer("custom-html", map[string]interface{}{
"type": custom.Name,
"tokenizer": "unicode",
"char_filters": []interface{}{html_char_filter.Name},
}); err != nil {
t.Fatal(err)
}
fm := mapping.NewTextFieldMapping()
fm.Analyzer = "custom-html"
dmap := mapping.NewDocumentMapping()
dmap.AddFieldMappingsAt("content", fm)
idxMapping.DefaultMapping = dmap
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, idxMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
content := " Welcome to blevesearch.
"
if err = idx.Index("doc", map[string]string{
"content": content,
}); err != nil {
t.Fatal(err)
}
searchStr := "blevesearch"
q := query.NewMatchQuery(searchStr)
q.SetField("content")
sr := NewSearchRequest(q)
sr.IncludeLocations = true
sr.Fields = []string{"*"}
sr.Highlight = NewHighlightWithStyle(html.Name)
searchResults, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if len(searchResults.Hits) != 1 ||
len(searchResults.Hits[0].Locations["content"][searchStr]) != 1 {
t.Fatalf("Expected 1 hit with 1 location")
}
expectedLocation := &search.Location{
Pos: 3,
Start: uint64(strings.Index(content, searchStr)),
End: uint64(strings.Index(content, searchStr) + len(searchStr)),
}
expectedFragment := "<div> Welcome to blevesearch. </div>"
gotLocation := searchResults.Hits[0].Locations["content"]["blevesearch"][0]
gotFragment := searchResults.Hits[0].Fragments["content"][0]
if !reflect.DeepEqual(expectedLocation, gotLocation) {
t.Fatalf("Mismatch in locations, got: %v, expected: %v",
gotLocation, expectedLocation)
}
if expectedFragment != gotFragment {
t.Fatalf("Mismatch in fragment, got: %v, expected: %v",
gotFragment, expectedFragment)
}
}
func TestIPRangeQuery(t *testing.T) {
idxMapping := NewIndexMapping()
im := NewIPFieldMapping()
dmap := mapping.NewDocumentMapping()
dmap.AddFieldMappingsAt("ip_content", im)
idxMapping.DefaultMapping = dmap
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, idxMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
ipContent := "192.168.10.11"
if err = idx.Index("doc", map[string]string{
"ip_content": ipContent,
}); err != nil {
t.Fatal(err)
}
q := query.NewIPRangeQuery("192.168.10.0/24")
q.SetField("ip_content")
sr := NewSearchRequest(q)
searchResults, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if len(searchResults.Hits) != 1 ||
searchResults.Hits[0].ID != "doc" {
t.Fatal("Expected the 1 result - doc")
}
}
func TestGeoShapePolygonContainsPoint(t *testing.T) {
fm := mapping.NewGeoShapeFieldMapping()
dmap := mapping.NewDocumentMapping()
dmap.AddFieldMappingsAt("geometry", fm)
idxMapping := NewIndexMapping()
idxMapping.DefaultMapping = dmap
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, idxMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
// Polygon coordinates to be ordered in counter-clock-wise order
// for the outer loop, and holes to follow clock-wise order.
// See: https://www.rfc-editor.org/rfc/rfc7946.html#section-3.1.6
one := []byte(`{
"geometry":{
"type":"Polygon",
"coordinates":[[
[4.8089,46.9307],
[4.8223,46.8915],
[4.8149,46.886],
[4.8252,46.8647],
[4.8305,46.8531],
[4.8506,46.8509],
[4.8574,46.8621],
[4.8576,46.8769],
[4.8753,46.8774],
[4.8909,46.8519],
[4.8837,46.8485],
[4.9014,46.8318],
[4.9067,46.8179],
[4.8986,46.8122],
[4.9081,46.7969],
[4.9535,46.8254],
[4.9577,46.8053],
[5.0201,46.821],
[5.0357,46.8207],
[5.0656,46.8434],
[5.0955,46.8411],
[5.1149,46.8435],
[5.1259,46.8395],
[5.1433,46.8463],
[5.1415,46.8589],
[5.1533,46.873],
[5.138,46.8843],
[5.1525,46.9012],
[5.1485,46.9165],
[5.1582,46.926],
[5.1882,46.9251],
[5.2039,46.9129],
[5.2223,46.9175],
[5.2168,46.926],
[5.2338,46.9316],
[5.228,46.9505],
[5.2078,46.9722],
[5.2117,46.98],
[5.1961,46.9783],
[5.1663,46.9638],
[5.1213,46.9634],
[5.1086,46.9596],
[5.0729,46.9604],
[5.0731,46.9668],
[5.0493,46.9817],
[5.0034,46.9722],
[4.9852,46.9585],
[4.9479,46.9664],
[4.8943,46.9663],
[4.8937,46.951],
[4.8534,46.9458],
[4.8089,46.9307]
]]
}
}`)
two := []byte(`{
"geometry":{
"type":"Polygon",
"coordinates":[[
[2.2266,48.7816],
[2.2266,48.7761],
[2.2288,48.7745],
[2.2717,48.7905],
[2.2799,48.8109],
[2.3013,48.8251],
[2.2894,48.8283],
[2.2726,48.8144],
[2.2518,48.8164],
[2.255,48.8101],
[2.2348,48.7954],
[2.2266,48.7816]
]]
}
}`)
var doc1, doc2 map[string]interface{}
if err = json.Unmarshal(one, &doc1); err != nil {
t.Fatal(err)
}
if err = idx.Index("doc1", doc1); err != nil {
t.Fatal(err)
}
if err = json.Unmarshal(two, &doc2); err != nil {
t.Fatal(err)
}
if err = idx.Index("doc2", doc2); err != nil {
t.Fatal(err)
}
for testi, test := range []struct {
coordinates []float64
expectHits []string
}{
{
coordinates: []float64{5, 46.9},
expectHits: []string{"doc1"},
},
{
coordinates: []float64{1.5, 48.2},
},
} {
q, err := NewGeoShapeQuery(
[][][][]float64{{{test.coordinates}}},
geo.PointType,
"contains",
)
if err != nil {
t.Fatalf("test: %d, query err: %v", testi+1, err)
}
q.SetField("geometry")
res, err := idx.Search(NewSearchRequest(q))
if err != nil {
t.Fatalf("test: %d, search err: %v", testi+1, err)
}
if len(res.Hits) != len(test.expectHits) {
t.Errorf("test: %d, unexpected hits: %v", testi+1, len(res.Hits))
}
OUTER:
for _, expect := range test.expectHits {
for _, got := range res.Hits {
if got.ID == expect {
continue OUTER
}
}
t.Errorf("test: %d, couldn't get: %v", testi+1, expect)
}
}
}
func TestAnalyzerInheritanceForDefaultDynamicMapping(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
imap := mapping.NewIndexMapping()
imap.DefaultMapping.DefaultAnalyzer = keyword.Name
idx, err := New(tmpIndexPath, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
doc := map[string]interface{}{
"fieldX": "AbCdEf",
}
if err = idx.Index("doc", doc); err != nil {
t.Fatal(err)
}
// Match query to apply keyword analyzer to fieldX.
mq := NewMatchQuery("AbCdEf")
mq.SetField("fieldX")
sr := NewSearchRequest(mq)
results, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if len(results.Hits) != 1 {
t.Fatalf("expected 1 hit, got %d", len(results.Hits))
}
}
func TestCustomDateTimeParserLayoutValidation(t *testing.T) {
flexiblegoName := flexible.Name
sanitizedgoName := sanitized.Name
imap := mapping.NewIndexMapping()
correctConfig := map[string]interface{}{
"type": sanitizedgoName,
"layouts": []interface{}{
// some custom layouts
"2006-01-02 15:04:05.0000",
"2006\\01\\02T03:04:05PM",
"2006/01/02",
"2006-01-02T15:04:05.999Z0700PMMST",
"15:04:05.0000Z07:00 Monday",
// standard layouts
time.Layout,
time.ANSIC,
time.UnixDate,
time.RubyDate,
time.RFC822,
time.RFC822Z,
time.RFC850,
time.RFC1123,
time.RFC1123Z,
time.RFC3339,
time.RFC3339Nano,
time.Kitchen,
time.Stamp,
time.StampMilli,
time.StampMicro,
time.StampNano,
"2006-01-02 15:04:05", // time.DateTime
"2006-01-02", // time.DateOnly
"15:04:05", // time.TimeOnly
// Corrected layouts to the incorrect ones below.
"2006-01-02 03:04:05 -0700",
"2006-01-02 15:04:05 -0700",
"3:04PM",
"2006-01-02 15:04:05.000 -0700 MST",
"January 2 2006 3:04 PM",
"02/Jan/06 3:04PM",
"Mon 02 Jan 3:04:05 PM",
},
}
// Correct layouts - sanitizedgo should work without errors.
err := imap.AddCustomDateTimeParser("custDT", correctConfig)
if err != nil {
t.Fatalf("expected no error, got: %v", err)
}
// Flexiblego should work without errors as well.
correctConfig["type"] = flexiblegoName
err = imap.AddCustomDateTimeParser("custDT_Flexi", correctConfig)
if err != nil {
t.Fatalf("expected no error, got: %v", err)
}
incorrectLayouts := [][]interface{}{
{
"2000-03-31 01:33:51 +0300",
},
{
"2006-01-02 15:04:51 +0300",
},
{
"2000-03-31 01:33:05 +0300",
},
{
"4:45PM",
},
{
"2006-01-02 15:04:05.445 -0700 MST",
},
{
"August 20 2001 8:55 AM",
},
{
"28/Jul/23 12:48PM",
},
{
"Tue 22 Aug 6:37:30 AM",
},
}
// first check sanitizedgo, should throw error for each of the incorrect layouts.
numExpectedErrors := len(incorrectLayouts)
numActualErrors := 0
for idx, badLayout := range incorrectLayouts {
incorrectConfig := map[string]interface{}{
"type": sanitizedgoName,
"layouts": badLayout,
}
err := imap.AddCustomDateTimeParser(fmt.Sprintf("%d_DT", idx), incorrectConfig)
if err != nil {
numActualErrors++
}
}
// Expecting all layouts to be incorrect, since sanitizedgo is being used.
if numActualErrors != numExpectedErrors {
t.Fatalf("expected %d errors, got: %d", numExpectedErrors, numActualErrors)
}
// sanity test - flexiblego should still allow the incorrect layouts, for legacy purposes
for idx, badLayout := range incorrectLayouts {
incorrectConfig := map[string]interface{}{
"type": flexiblegoName,
"layouts": badLayout,
}
err := imap.AddCustomDateTimeParser(fmt.Sprintf("%d_DT_Flexi", idx), incorrectConfig)
if err != nil {
t.Fatalf("expected no error, got: %v", err)
}
}
}
func TestDateRangeStringQuery(t *testing.T) {
idxMapping := NewIndexMapping()
err := idxMapping.AddCustomDateTimeParser("customDT", map[string]interface{}{
"type": sanitized.Name,
"layouts": []interface{}{
"02/01/2006 15:04:05",
"2006/01/02 3:04PM",
},
})
if err != nil {
t.Fatal(err)
}
err = idxMapping.AddCustomDateTimeParser("queryDT", map[string]interface{}{
"type": sanitized.Name,
"layouts": []interface{}{
"02/01/2006 3:04PM",
},
})
if err != nil {
t.Fatal(err)
}
dtmap := NewDateTimeFieldMapping()
dtmap.DateFormat = "customDT"
idxMapping.DefaultMapping.AddFieldMappingsAt("date", dtmap)
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, idxMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
documents := map[string]map[string]interface{}{
"doc1": {
"date": "2001/08/20 6:00PM",
},
"doc2": {
"date": "20/08/2001 18:00:20",
},
"doc3": {
"date": "20/08/2001 18:10:00",
},
"doc4": {
"date": "2001/08/20 6:15PM",
},
"doc5": {
"date": "20/08/2001 18:20:00",
},
}
batch := idx.NewBatch()
for docID, doc := range documents {
err := batch.Index(docID, doc)
if err != nil {
t.Fatal(err)
}
}
err = idx.Batch(batch)
if err != nil {
t.Fatal(err)
}
type testResult struct {
docID string // doc ID of the hit
hitField string // fields returned as part of the hit
}
type testStruct struct {
start string
end string
field string
dateTimeParser string // name of the custom date time parser to use if nil, use QueryDateTimeParser
includeStart bool
includeEnd bool
expectedHits []testResult
err error
}
testQueries := []testStruct{
// test cases with RFC3339 parser and toggling includeStart and includeEnd
{
start: "2001-08-20T18:00:00",
end: "2001-08-20T18:10:00",
field: "date",
includeStart: true,
includeEnd: true,
expectedHits: []testResult{
{
docID: "doc1",
hitField: "2001/08/20 6:00PM",
},
{
docID: "doc2",
hitField: "20/08/2001 18:00:20",
},
{
docID: "doc3",
hitField: "20/08/2001 18:10:00",
},
},
},
{
start: "2001-08-20T18:00:00",
end: "2001-08-20T18:10:00",
field: "date",
includeStart: false,
includeEnd: true,
expectedHits: []testResult{
{
docID: "doc2",
hitField: "20/08/2001 18:00:20",
},
{
docID: "doc3",
hitField: "20/08/2001 18:10:00",
},
},
},
{
start: "2001-08-20T18:00:00",
end: "2001-08-20T18:10:00",
field: "date",
includeStart: false,
includeEnd: false,
expectedHits: []testResult{
{
docID: "doc2",
hitField: "20/08/2001 18:00:20",
},
},
},
// test cases with custom parser and omitting start and end
{
start: "20/08/2001 18:00:00",
end: "2001/08/20 6:10PM",
field: "date",
dateTimeParser: "customDT",
includeStart: true,
includeEnd: true,
expectedHits: []testResult{
{
docID: "doc1",
hitField: "2001/08/20 6:00PM",
},
{
docID: "doc2",
hitField: "20/08/2001 18:00:20",
},
{
docID: "doc3",
hitField: "20/08/2001 18:10:00",
},
},
},
{
end: "20/08/2001 18:15:00",
field: "date",
dateTimeParser: "customDT",
includeStart: true,
includeEnd: true,
expectedHits: []testResult{
{
docID: "doc1",
hitField: "2001/08/20 6:00PM",
},
{
docID: "doc2",
hitField: "20/08/2001 18:00:20",
},
{
docID: "doc3",
hitField: "20/08/2001 18:10:00",
},
{
docID: "doc4",
hitField: "2001/08/20 6:15PM",
},
},
},
{
start: "2001/08/20 6:15PM",
field: "date",
dateTimeParser: "customDT",
includeStart: true,
includeEnd: true,
expectedHits: []testResult{
{
docID: "doc4",
hitField: "2001/08/20 6:15PM",
},
{
docID: "doc5",
hitField: "20/08/2001 18:20:00",
},
},
},
{
start: "20/08/2001 6:15PM",
field: "date",
dateTimeParser: "queryDT",
includeStart: true,
includeEnd: true,
expectedHits: []testResult{
{
docID: "doc4",
hitField: "2001/08/20 6:15PM",
},
{
docID: "doc5",
hitField: "20/08/2001 18:20:00",
},
},
},
// error path test cases
{
field: "date",
dateTimeParser: "customDT",
includeStart: true,
includeEnd: true,
err: fmt.Errorf("date range query must specify at least one of start/end"),
},
{
field: "date",
includeStart: true,
includeEnd: true,
err: fmt.Errorf("date range query must specify at least one of start/end"),
},
{
start: "2001-08-20T18:00:00",
end: "2001-08-20T18:10:00",
field: "date",
dateTimeParser: "customDT",
err: fmt.Errorf("unable to parse datetime with any of the layouts, date time parser name: customDT"),
},
{
start: "3001-08-20T18:00:00",
end: "2001-08-20T18:10:00",
field: "date",
err: fmt.Errorf("invalid/unsupported date range, start: 3001-08-20T18:00:00"),
},
{
start: "2001/08/20 6:00PM",
end: "3001/08/20 6:30PM",
field: "date",
dateTimeParser: "customDT",
err: fmt.Errorf("invalid/unsupported date range, end: 3001/08/20 6:30PM"),
},
}
for _, dtq := range testQueries {
var err error
dateQuery := NewDateRangeInclusiveStringQuery(dtq.start, dtq.end, &dtq.includeStart, &dtq.includeEnd)
dateQuery.SetDateTimeParser(dtq.dateTimeParser)
dateQuery.SetField(dtq.field)
sr := NewSearchRequest(dateQuery)
sr.SortBy([]string{dtq.field})
sr.Fields = []string{dtq.field}
res, err := idx.Search(sr)
if err != nil {
if dtq.err == nil {
t.Fatalf("expected no error, got: %v", err)
}
if dtq.err.Error() != err.Error() {
t.Fatalf("expected error: %v, got: %v", dtq.err, err)
}
continue
}
if len(res.Hits) != len(dtq.expectedHits) {
t.Fatalf("expected %d hits, got %d", len(dtq.expectedHits), len(res.Hits))
}
for i, hit := range res.Hits {
if hit.ID != dtq.expectedHits[i].docID {
t.Fatalf("expected docID %s, got %s", dtq.expectedHits[i].docID, hit.ID)
}
if hit.Fields[dtq.field].(string) != dtq.expectedHits[i].hitField {
t.Fatalf("expected hit field %s, got %s", dtq.expectedHits[i].hitField, hit.Fields[dtq.field])
}
}
}
}
func TestDateRangeFacetQueriesWithCustomDateTimeParser(t *testing.T) {
idxMapping := NewIndexMapping()
err := idxMapping.AddCustomDateTimeParser("customDT", map[string]interface{}{
"type": sanitized.Name,
"layouts": []interface{}{
"02/01/2006 15:04:05",
"2006/01/02 3:04PM",
},
})
if err != nil {
t.Fatal(err)
}
err = idxMapping.AddCustomDateTimeParser("queryDT", map[string]interface{}{
"type": sanitized.Name,
"layouts": []interface{}{
"02/01/2006 3:04PM",
},
})
if err != nil {
t.Fatal(err)
}
dtmap := NewDateTimeFieldMapping()
dtmap.DateFormat = "customDT"
idxMapping.DefaultMapping.AddFieldMappingsAt("date", dtmap)
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, idxMapping)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
documents := map[string]map[string]interface{}{
"doc1": {
"date": "2001/08/20 6:00PM",
},
"doc2": {
"date": "20/08/2001 18:00:20",
},
"doc3": {
"date": "20/08/2001 18:10:00",
},
"doc4": {
"date": "2001/08/20 6:15PM",
},
"doc5": {
"date": "20/08/2001 18:20:00",
},
}
batch := idx.NewBatch()
for docID, doc := range documents {
err := batch.Index(docID, doc)
if err != nil {
t.Fatal(err)
}
}
err = idx.Batch(batch)
if err != nil {
t.Fatal(err)
}
query := NewMatchAllQuery()
type testFacetResult struct {
name string
start string
end string
count int
err error
}
type testFacetRequest struct {
name string
start string
end string
parser string
result testFacetResult
}
tests := []testFacetRequest{
{
// Test without a query time override of the parser (use default parser)
name: "test",
start: "2001-08-20 18:00:00",
end: "2001-08-20 18:10:00",
result: testFacetResult{
name: "test",
start: "2001-08-20T18:00:00Z",
end: "2001-08-20T18:10:00Z",
count: 2,
err: nil,
},
},
{
name: "test",
start: "20/08/2001 6:00PM",
end: "20/08/2001 6:10PM",
parser: "queryDT",
result: testFacetResult{
name: "test",
start: "2001-08-20T18:00:00Z",
end: "2001-08-20T18:10:00Z",
count: 2,
err: nil,
},
},
{
name: "test",
start: "20/08/2001 15:00:00",
end: "2001/08/20 6:10PM",
parser: "customDT",
result: testFacetResult{
name: "test",
start: "2001-08-20T15:00:00Z",
end: "2001-08-20T18:10:00Z",
count: 2,
err: nil,
},
},
{
name: "test",
end: "2001/08/20 6:15PM",
parser: "customDT",
result: testFacetResult{
name: "test",
end: "2001-08-20T18:15:00Z",
count: 3,
err: nil,
},
},
{
name: "test",
start: "20/08/2001 6:15PM",
parser: "queryDT",
result: testFacetResult{
name: "test",
start: "2001-08-20T18:15:00Z",
count: 2,
err: nil,
},
},
// some error cases
{
name: "test",
parser: "queryDT",
result: testFacetResult{
name: "test",
err: fmt.Errorf("date range query must specify either start, end or both for date range name 'test'"),
},
},
{
// default parser is used for the query, but the start time is not in the correct format (RFC3339),
// so it should throw an error
name: "test",
start: "20/08/2001 6:15PM",
result: testFacetResult{
name: "test",
err: fmt.Errorf("ParseDates err: error parsing start date '20/08/2001 6:15PM' for date range name 'test': unable to parse datetime with any of the layouts, using date time parser named dateTimeOptional"),
},
},
}
for _, test := range tests {
searchRequest := NewSearchRequest(query)
fr := NewFacetRequest("date", 100)
start := &test.start
if test.start == "" {
start = nil
}
end := &test.end
if test.end == "" {
end = nil
}
fr.AddDateTimeRangeStringWithParser(test.name, start, end, test.parser)
searchRequest.AddFacet("dateFacet", fr)
searchResults, err := idx.Search(searchRequest)
if err != nil {
if test.result.err == nil {
t.Fatalf("Unexpected error: %v", err)
}
if err.Error() != test.result.err.Error() {
t.Fatalf("Expected error %v, got %v", test.result.err, err)
}
continue
}
for _, facetResult := range searchResults.Facets {
if len(facetResult.DateRanges) != 1 {
t.Fatal("Expected 1 date range facet")
}
result := facetResult.DateRanges[0]
if result.Name != test.result.name {
t.Fatalf("Expected name %s, got %s", test.result.name, result.Name)
}
if result.Start != nil && *result.Start != test.result.start {
t.Fatalf("Expected start %s, got %s", test.result.start, *result.Start)
}
if result.End != nil && *result.End != test.result.end {
t.Fatalf("Expected end %s, got %s", test.result.end, *result.End)
}
if result.Start == nil && test.result.start != "" {
t.Fatalf("Expected start %s, got nil", test.result.start)
}
if result.End == nil && test.result.end != "" {
t.Fatalf("Expected end %s, got nil", test.result.end)
}
if result.Count != test.result.count {
t.Fatalf("Expected count %d, got %d", test.result.count, result.Count)
}
}
}
}
func TestDateRangeTimestampQueries(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
imap := mapping.NewIndexMapping()
// add a date field with a valid format to the default mapping
// for good measure
dtParserConfig := map[string]interface{}{
"type": flexible.Name,
"layouts": []interface{}{"2006/01/02 15:04:05"},
}
err := imap.AddCustomDateTimeParser("custDT", dtParserConfig)
if err != nil {
t.Fatal(err)
}
dateField := mapping.NewDateTimeFieldMapping()
dateField.DateFormat = "custDT"
unixSecField := mapping.NewDateTimeFieldMapping()
unixSecField.DateFormat = seconds.Name
unixMilliSecField := mapping.NewDateTimeFieldMapping()
unixMilliSecField.DateFormat = milliseconds.Name
unixMicroSecField := mapping.NewDateTimeFieldMapping()
unixMicroSecField.DateFormat = microseconds.Name
unixNanoSecField := mapping.NewDateTimeFieldMapping()
unixNanoSecField.DateFormat = nanoseconds.Name
imap.DefaultMapping.AddFieldMappingsAt("date", dateField)
imap.DefaultMapping.AddFieldMappingsAt("seconds", unixSecField)
imap.DefaultMapping.AddFieldMappingsAt("milliseconds", unixMilliSecField)
imap.DefaultMapping.AddFieldMappingsAt("microseconds", unixMicroSecField)
imap.DefaultMapping.AddFieldMappingsAt("nanoseconds", unixNanoSecField)
idx, err := New(tmpIndexPath, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
documents := map[string]map[string]string{
"doc1": {
"date": "2001/08/20 03:00:10",
"seconds": "998276410",
"milliseconds": "998276410100",
"microseconds": "998276410100300",
"nanoseconds": "998276410100300400",
},
"doc2": {
"date": "2001/08/20 03:00:20",
"seconds": "998276420",
"milliseconds": "998276410200",
"microseconds": "998276410100400",
"nanoseconds": "998276410100300500",
},
"doc3": {
"date": "2001/08/20 03:00:30",
"seconds": "998276430",
"milliseconds": "998276410300",
"microseconds": "998276410100500",
"nanoseconds": "998276410100300600",
},
"doc4": {
"date": "2001/08/20 03:00:40",
"seconds": "998276440",
"milliseconds": "998276410400",
"microseconds": "998276410100600",
"nanoseconds": "998276410100300700",
},
"doc5": {
"date": "2001/08/20 03:00:50",
"seconds": "998276450",
"milliseconds": "998276410500",
"microseconds": "998276410100700",
"nanoseconds": "998276410100300800",
},
}
batch := idx.NewBatch()
for docID, doc := range documents {
err := batch.Index(docID, doc)
if err != nil {
t.Fatal(err)
}
}
err = idx.Batch(batch)
if err != nil {
t.Fatal(err)
}
type testStruct struct {
start string
end string
field string
expectedHits []string
}
testQueries := []testStruct{
{
start: "2001-08-20T03:00:05",
end: "2001-08-20T03:00:25",
field: "date",
expectedHits: []string{
"doc1",
"doc2",
},
},
{
start: "2001-08-20T03:00:15",
end: "2001-08-20T03:00:35",
field: "seconds",
expectedHits: []string{
"doc2",
"doc3",
},
},
{
start: "2001-08-20T03:00:10.150",
end: "2001-08-20T03:00:10.450",
field: "milliseconds",
expectedHits: []string{
"doc2",
"doc3",
"doc4",
},
},
{
start: "2001-08-20T03:00:10.100450",
end: "2001-08-20T03:00:10.100650",
field: "microseconds",
expectedHits: []string{
"doc3",
"doc4",
},
},
{
start: "2001-08-20T03:00:10.100300550",
end: "2001-08-20T03:00:10.100300850",
field: "nanoseconds",
expectedHits: []string{
"doc3",
"doc4",
"doc5",
},
},
}
testLayout := "2006-01-02T15:04:05"
for _, dtq := range testQueries {
startTime, err := time.Parse(testLayout, dtq.start)
if err != nil {
t.Fatal(err)
}
endTime, err := time.Parse(testLayout, dtq.end)
if err != nil {
t.Fatal(err)
}
drq := NewDateRangeQuery(startTime, endTime)
drq.SetField(dtq.field)
sr := NewSearchRequest(drq)
sr.SortBy([]string{dtq.field})
sr.Fields = []string{"*"}
res, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if len(res.Hits) != len(dtq.expectedHits) {
t.Fatalf("expected %d hits, got %d", len(dtq.expectedHits), len(res.Hits))
}
for i, hit := range res.Hits {
if hit.ID != dtq.expectedHits[i] {
t.Fatalf("expected docID %s, got %s", dtq.expectedHits[i], hit.ID)
}
if len(hit.Fields) != len(documents[hit.ID]) {
t.Fatalf("expected hit %s to have %d fields, got %d", hit.ID, len(documents[hit.ID]), len(hit.Fields))
}
for k, v := range documents[hit.ID] {
if hit.Fields[k] != v {
t.Fatalf("expected field %s to be %s, got %s", k, v, hit.Fields[k])
}
}
}
}
}
func TestPercentAndIsoStyleDates(t *testing.T) {
percentName := percent.Name
isoName := iso.Name
imap := mapping.NewIndexMapping()
percentConfig := map[string]interface{}{
"type": percentName,
"layouts": []interface{}{
"%Y/%m/%d %l:%M%p", // doc 1
"%d/%m/%Y %H:%M:%S", // doc 2
"%Y-%m-%dT%H:%M:%S%z", // doc 3
"%d %B %y %l%p %Z", // doc 4
"%Y; %b %d (%a) %I:%M:%S.%N%P %z", // doc 5
},
}
isoConfig := map[string]interface{}{
"type": isoName,
"layouts": []interface{}{
"yyyy/MM/dd h:mma", // doc 1
"dd/MM/yyyy HH:mm:ss", // doc 2
"yyyy-MM-dd'T'HH:mm:ssXX", // doc 3
"dd MMMM yy ha z", // doc 4
"yyyy; MMM dd (EEE) hh:mm:ss.SSSSSaa xx", // doc 5
},
}
err := imap.AddCustomDateTimeParser("percentDate", percentConfig)
if err != nil {
t.Fatal(err)
}
err = imap.AddCustomDateTimeParser("isoDate", isoConfig)
if err != nil {
t.Fatal(err)
}
percentField := mapping.NewDateTimeFieldMapping()
percentField.DateFormat = "percentDate"
isoField := mapping.NewDateTimeFieldMapping()
isoField.DateFormat = "isoDate"
imap.DefaultMapping.AddFieldMappingsAt("percentDate", percentField)
imap.DefaultMapping.AddFieldMappingsAt("isoDate", isoField)
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
idx, err := New(tmpIndexPath, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
documents := map[string]map[string]interface{}{
"doc1": {
"percentDate": "2001/08/20 6:00PM",
"isoDate": "2001/08/20 6:00PM",
},
"doc2": {
"percentDate": "20/08/2001 18:05:00",
"isoDate": "20/08/2001 18:05:00",
},
"doc3": {
"percentDate": "2001-08-20T18:10:00Z",
"isoDate": "2001-08-20T18:10:00Z",
},
"doc4": {
"percentDate": "20 August 01 6PM UTC",
"isoDate": "20 August 01 6PM UTC",
},
"doc5": {
"percentDate": "2001; Aug 20 (Mon) 06:15:15.23456pm +0000",
"isoDate": "2001; Aug 20 (Mon) 06:15:15.23456pm +0000",
},
}
batch := idx.NewBatch()
for docID, doc := range documents {
err := batch.Index(docID, doc)
if err != nil {
t.Fatal(err)
}
}
err = idx.Batch(batch)
if err != nil {
t.Fatal(err)
}
type testStruct struct {
start string
end string
field string
}
for _, field := range []string{"percentDate", "isoDate"} {
testQueries := []testStruct{
{
start: "2001/08/20 6:00PM",
end: "2001/08/20 6:20PM",
field: field,
},
{
start: "20/08/2001 18:00:00",
end: "20/08/2001 18:20:00",
field: field,
},
{
start: "2001-08-20T18:00:00Z",
end: "2001-08-20T18:20:00Z",
field: field,
},
{
start: "20 August 01 6PM UTC",
end: "20 August 01 7PM UTC",
field: field,
},
{
start: "2001; Aug 20 (Mon) 06:00:00.00000pm +0000",
end: "2001; Aug 20 (Mon) 06:20:20.00000pm +0000",
field: field,
},
}
includeStart := true
includeEnd := true
for _, dtq := range testQueries {
drq := NewDateRangeInclusiveStringQuery(dtq.start, dtq.end, &includeStart, &includeEnd)
drq.SetField(dtq.field)
drq.SetDateTimeParser(field)
sr := NewSearchRequest(drq)
res, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if len(res.Hits) != 5 {
t.Fatalf("expected %d hits, got %d", 5, len(res.Hits))
}
}
}
}
func roundToDecimalPlace(num float64, decimalPlaces int) float64 {
precision := math.Pow(10, float64(decimalPlaces))
return math.Round(num*precision) / precision
}
func TestScoreBreakdown(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
imap := mapping.NewIndexMapping()
textField := mapping.NewTextFieldMapping()
textField.Analyzer = simple.Name
imap.DefaultMapping.AddFieldMappingsAt("text", textField)
documents := map[string]map[string]interface{}{
"doc1": {
"text": "lorem ipsum dolor sit amet consectetur adipiscing elit do eiusmod tempor",
},
"doc2": {
"text": "lorem dolor amet adipiscing sed eiusmod",
},
"doc3": {
"text": "ipsum sit consectetur elit do tempor",
},
"doc4": {
"text": "lorem ipsum sit amet adipiscing elit do eiusmod",
},
}
idx, err := New(tmpIndexPath, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
batch := idx.NewBatch()
for docID, doc := range documents {
err := batch.Index(docID, doc)
if err != nil {
t.Fatal(err)
}
}
err = idx.Batch(batch)
if err != nil {
t.Fatal(err)
}
type testResult struct {
docID string // doc ID of the hit
score float64
scoreBreakdown map[int]float64
}
type testStruct struct {
query string
typ string
expectHits []testResult
}
testQueries := []testStruct{
{
// trigger disjunction heap searcher (>10 searchers)
// expect score breakdown to have a 0 at BLANK
query: `{"disjuncts":[{"term":"lorem","field":"text"},{"term":"blank","field":"text"},{"term":"ipsum","field":"text"},{"term":"blank","field":"text"},{"term":"blank","field":"text"},{"term":"dolor","field":"text"},{"term":"sit","field":"text"},{"term":"amet","field":"text"},{"term":"consectetur","field":"text"},{"term":"blank","field":"text"},{"term":"adipiscing","field":"text"},{"term":"blank","field":"text"},{"term":"elit","field":"text"},{"term":"sed","field":"text"},{"term":"do","field":"text"},{"term":"eiusmod","field":"text"},{"term":"tempor","field":"text"},{"term":"blank","field":"text"},{"term":"blank","field":"text"}]}`,
typ: "disjunction",
expectHits: []testResult{
{
docID: "doc1",
score: 0.3034548543819603,
scoreBreakdown: map[int]float64{0: 0.040398807605268316, 2: 0.040398807605268316, 5: 0.0669862776967768, 6: 0.040398807605268316, 7: 0.040398807605268316, 8: 0.0669862776967768, 10: 0.040398807605268316, 12: 0.040398807605268316, 14: 0.040398807605268316, 15: 0.040398807605268316, 16: 0.0669862776967768},
},
{
docID: "doc2",
score: 0.14725661652397853,
scoreBreakdown: map[int]float64{0: 0.05470024557900147, 5: 0.09069985124905133, 7: 0.05470024557900147, 10: 0.05470024557900147, 13: 0.15681178542754148, 15: 0.05470024557900147},
},
{
docID: "doc3",
score: 0.12637916362550797,
scoreBreakdown: map[int]float64{2: 0.05470024557900147, 6: 0.05470024557900147, 8: 0.09069985124905133, 12: 0.05470024557900147, 14: 0.05470024557900147, 16: 0.09069985124905133},
},
{
docID: "doc4",
score: 0.15956816751152955,
scoreBreakdown: map[int]float64{0: 0.04737179972998534, 2: 0.04737179972998534, 6: 0.04737179972998534, 7: 0.04737179972998534, 10: 0.04737179972998534, 12: 0.04737179972998534, 14: 0.04737179972998534, 15: 0.04737179972998534},
},
},
},
{
// trigger disjunction slice searcher (< 10 searchers)
// expect BLANK to give a 0 in score breakdown
query: `{"disjuncts":[{"term":"blank","field":"text"},{"term":"lorem","field":"text"},{"term":"ipsum","field":"text"},{"term":"blank","field":"text"},{"term":"blank","field":"text"},{"term":"dolor","field":"text"},{"term":"sit","field":"text"},{"term":"blank","field":"text"}]}`,
typ: "disjunction",
expectHits: []testResult{
{
docID: "doc1",
score: 0.1340684440934241,
scoreBreakdown: map[int]float64{1: 0.05756326446708409, 2: 0.05756326446708409, 5: 0.09544709478559595, 6: 0.05756326446708409},
},
{
docID: "doc2",
score: 0.05179425287147191,
scoreBreakdown: map[int]float64{1: 0.0779410306721006, 5: 0.129235980813787},
},
{
docID: "doc3",
score: 0.0389705153360503,
scoreBreakdown: map[int]float64{2: 0.0779410306721006, 6: 0.0779410306721006},
},
{
docID: "doc4",
score: 0.07593627256602972,
scoreBreakdown: map[int]float64{1: 0.06749890894758198, 2: 0.06749890894758198, 6: 0.06749890894758198},
},
},
},
}
for _, dtq := range testQueries {
var q query.Query
var rv query.DisjunctionQuery
err := json.Unmarshal([]byte(dtq.query), &rv)
if err != nil {
t.Fatal(err)
}
rv.RetrieveScoreBreakdown(true)
q = &rv
sr := NewSearchRequest(q)
sr.SortBy([]string{"_id"})
sr.Explain = true
res, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if len(res.Hits) != len(dtq.expectHits) {
t.Fatalf("expected %d hits, got %d", len(dtq.expectHits), len(res.Hits))
}
for i, hit := range res.Hits {
if hit.ID != dtq.expectHits[i].docID {
t.Fatalf("expected docID %s, got %s", dtq.expectHits[i].docID, hit.ID)
}
if len(hit.ScoreBreakdown) != len(dtq.expectHits[i].scoreBreakdown) {
t.Fatalf("expected %d score breakdown, got %d", len(dtq.expectHits[i].scoreBreakdown), len(hit.ScoreBreakdown))
}
for j, score := range hit.ScoreBreakdown {
actualScore := roundToDecimalPlace(score, 3)
expectScore := roundToDecimalPlace(dtq.expectHits[i].scoreBreakdown[j], 3)
if actualScore != expectScore {
t.Fatalf("expected score breakdown %f, got %f", dtq.expectHits[i].scoreBreakdown[j], score)
}
}
}
}
}
func TestAutoFuzzy(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
imap := mapping.NewIndexMapping()
if err := imap.AddCustomAnalyzer("splitter", map[string]interface{}{
"type": custom.Name,
"tokenizer": whitespace.Name,
"token_filters": []interface{}{lowercase.Name},
}); err != nil {
t.Fatal(err)
}
textField := mapping.NewTextFieldMapping()
textField.Analyzer = "splitter"
textField.Store = true
textField.IncludeTermVectors = true
textField.IncludeInAll = true
imap.DefaultMapping.Dynamic = false
imap.DefaultMapping.AddFieldMappingsAt("model", textField)
documents := map[string]map[string]interface{}{
"product1": {
"model": "apple iphone 12",
},
"product2": {
"model": "apple iphone 13",
},
"product3": {
"model": "samsung galaxy s22",
},
"product4": {
"model": "samsung galaxy note",
},
"product5": {
"model": "google pixel 5",
},
"product6": {
"model": "oneplus 9 pro",
},
"product7": {
"model": "xiaomi mi 11",
},
"product8": {
"model": "oppo find x3",
},
"product9": {
"model": "vivo x60 pro",
},
"product10": {
"model": "oneplus 8t pro",
},
"product11": {
"model": "nokia xr20",
},
"product12": {
"model": "poco f1",
},
"product13": {
"model": "asus rog 5",
},
"product14": {
"model": "samsung galaxy a15 5g",
},
"product15": {
"model": "tecno camon 17",
},
}
idx, err := New(tmpIndexPath, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
batch := idx.NewBatch()
for docID, doc := range documents {
err := batch.Index(docID, doc)
if err != nil {
t.Fatal(err)
}
}
err = idx.Batch(batch)
if err != nil {
t.Fatal(err)
}
type testStruct struct {
query string
expectHits []string
}
testQueries := []testStruct{
{
// match query with fuzziness set to 2
query: `{
"match" : "applle iphone 12",
"fuzziness": 2,
"field" : "model"
}`,
expectHits: []string{"product1", "product2", "product7", "product14", "product15", "product12", "product10", "product3", "product6", "product8"},
},
{
// match query with fuzziness set to "auto"
query: `{
"match" : "applle iphone 12",
"fuzziness": "auto",
"field" : "model"
}`,
expectHits: []string{"product1", "product2"},
},
{
// match query with fuzziness set to 2 with `and` operator
query: `{
"match" : "applle iphone 12",
"fuzziness": 2,
"field" : "model",
"operator": "and"
}`,
expectHits: []string{"product1", "product2"},
},
{
// match query with fuzziness set to "auto" with `and`` operator
query: `{
"match" : "applle iphone 12",
"fuzziness": "auto",
"field" : "model",
"operator": "and"
}`,
expectHits: []string{"product1"},
},
// match phrase query with fuzziness set to 2
{
query: `{
"match_phrase" : "onplus 9 pro",
"fuzziness": 2,
"field" : "model"
}`,
expectHits: []string{"product6", "product10"},
},
// match phrase query with fuzziness set to "auto"
{
query: `{
"match_phrase" : "onplus 9 pro",
"fuzziness": "auto",
"field" : "model"
}`,
expectHits: []string{"product6"},
},
}
for _, dtq := range testQueries {
q, err := query.ParseQuery([]byte(dtq.query))
if err != nil {
t.Fatal(err)
}
sr := NewSearchRequest(q)
sr.Highlight = NewHighlightWithStyle(ansi.Name)
sr.SortBy([]string{"-_score", "_id"})
sr.Fields = []string{"*"}
sr.Explain = true
res, err := idx.Search(sr)
if err != nil {
t.Fatal(err)
}
if len(res.Hits) != len(dtq.expectHits) {
t.Fatalf("expected %d hits, got %d", len(dtq.expectHits), len(res.Hits))
}
for i, hit := range res.Hits {
if hit.ID != dtq.expectHits[i] {
t.Fatalf("expected docID %s, got %s", dtq.expectHits[i], hit.ID)
}
}
}
}
func TestThesaurusTermReader(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
synonymCollection := "collection1"
synonymSourceName := "english"
analyzer := simple.Name
synonymSourceConfig := map[string]interface{}{
"collection": synonymCollection,
"analyzer": analyzer,
}
textField := mapping.NewTextFieldMapping()
textField.Analyzer = analyzer
textField.SynonymSource = synonymSourceName
imap := mapping.NewIndexMapping()
imap.DefaultMapping.AddFieldMappingsAt("text", textField)
err := imap.AddSynonymSource(synonymSourceName, synonymSourceConfig)
if err != nil {
t.Fatal(err)
}
err = imap.Validate()
if err != nil {
t.Fatal(err)
}
idx, err := New(tmpIndexPath, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
documents := map[string]map[string]interface{}{
"doc1": {
"text": "quick brown fox eats",
},
"doc2": {
"text": "fast red wolf jumps",
},
"doc3": {
"text": "quick red cat runs",
},
"doc4": {
"text": "speedy brown dog barks",
},
"doc5": {
"text": "fast green rabbit hops",
},
}
batch := idx.NewBatch()
for docID, doc := range documents {
err := batch.Index(docID, doc)
if err != nil {
t.Fatal(err)
}
}
synonymDocuments := map[string]*SynonymDefinition{
"synDoc1": {
Synonyms: []string{"quick", "fast", "speedy"},
},
"synDoc2": {
Input: []string{"color", "colour"},
Synonyms: []string{"red", "green", "blue", "yellow", "brown"},
},
"synDoc3": {
Input: []string{"animal", "creature"},
Synonyms: []string{"fox", "wolf", "cat", "dog", "rabbit"},
},
"synDoc4": {
Synonyms: []string{"eats", "jumps", "runs", "barks", "hops"},
},
}
for synName, synDef := range synonymDocuments {
err := batch.IndexSynonym(synName, synonymCollection, synDef)
if err != nil {
t.Fatal(err)
}
}
err = idx.Batch(batch)
if err != nil {
t.Fatal(err)
}
sco, err := idx.Advanced()
if err != nil {
t.Fatal(err)
}
reader, err := sco.Reader()
if err != nil {
t.Fatal(err)
}
defer func() {
err = reader.Close()
if err != nil {
t.Fatal(err)
}
}()
thesReader, ok := reader.(index.ThesaurusReader)
if !ok {
t.Fatal("expected thesaurus reader")
}
type testStruct struct {
queryTerm string
expectedSynonyms []string
}
testQueries := []testStruct{
{
queryTerm: "quick",
expectedSynonyms: []string{"fast", "speedy"},
},
{
queryTerm: "red",
expectedSynonyms: []string{},
},
{
queryTerm: "color",
expectedSynonyms: []string{"red", "green", "blue", "yellow", "brown"},
},
{
queryTerm: "colour",
expectedSynonyms: []string{"red", "green", "blue", "yellow", "brown"},
},
{
queryTerm: "animal",
expectedSynonyms: []string{"fox", "wolf", "cat", "dog", "rabbit"},
},
{
queryTerm: "creature",
expectedSynonyms: []string{"fox", "wolf", "cat", "dog", "rabbit"},
},
{
queryTerm: "fox",
expectedSynonyms: []string{},
},
{
queryTerm: "eats",
expectedSynonyms: []string{"jumps", "runs", "barks", "hops"},
},
{
queryTerm: "jumps",
expectedSynonyms: []string{"eats", "runs", "barks", "hops"},
},
}
for _, test := range testQueries {
str, err := thesReader.ThesaurusTermReader(context.Background(), synonymSourceName, []byte(test.queryTerm))
if err != nil {
t.Fatal(err)
}
var gotSynonyms []string
for {
synonym, err := str.Next()
if err != nil {
t.Fatal(err)
}
if synonym == "" {
break
}
gotSynonyms = append(gotSynonyms, string(synonym))
}
if len(gotSynonyms) != len(test.expectedSynonyms) {
t.Fatalf("expected %d synonyms, got %d", len(test.expectedSynonyms), len(gotSynonyms))
}
sort.Strings(gotSynonyms)
sort.Strings(test.expectedSynonyms)
for i, syn := range gotSynonyms {
if syn != test.expectedSynonyms[i] {
t.Fatalf("expected synonym %s, got %s", test.expectedSynonyms[i], syn)
}
}
}
}
func TestSynonymSearchQueries(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
synonymCollection := "collection1"
synonymSourceName := "english"
analyzer := en.AnalyzerName
synonymSourceConfig := map[string]interface{}{
"collection": synonymCollection,
"analyzer": analyzer,
}
textField := mapping.NewTextFieldMapping()
textField.Analyzer = analyzer
textField.SynonymSource = synonymSourceName
imap := mapping.NewIndexMapping()
imap.DefaultMapping.AddFieldMappingsAt("text", textField)
err := imap.AddSynonymSource(synonymSourceName, synonymSourceConfig)
if err != nil {
t.Fatal(err)
}
err = imap.Validate()
if err != nil {
t.Fatal(err)
}
idx, err := New(tmpIndexPath, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
documents := map[string]map[string]interface{}{
"doc1": {
"text": `The hardworking employee consistently strives to exceed expectations.
His industrious nature makes him a valuable asset to any team.
His conscientious attention to detail ensures that projects are completed efficiently and accurately.
He remains persistent even in the face of challenges.`,
},
"doc2": {
"text": `The tranquil surroundings of the retreat provide a perfect escape from the hustle and bustle of city life.
Guests enjoy the peaceful atmosphere, which is perfect for relaxation and rejuvenation.
The calm environment offers the ideal place to meditate and connect with nature.
Even the most stressed individuals find themselves feeling relaxed and at ease.`,
},
"doc3": {
"text": `The house was burned down, leaving only a charred shell behind.
The intense heat of the flames caused the walls to warp and the roof to cave in.
The seared remains of the furniture told the story of the blaze.
The incinerated remains left little more than ashes to remember what once was.`,
},
"doc4": {
"text": `The faithful dog followed its owner everywhere, always loyal and steadfast.
It was devoted to protecting its family, and its reliable nature meant it could always be trusted.
In the face of danger, the dog remained calm, knowing its role was to stay vigilant.
Its trustworthy companionship provided comfort and security.`,
},
"doc5": {
"text": `The lively market is bustling with activity from morning to night.
The dynamic energy of the crowd fills the air as vendors sell their wares.
Shoppers wander from stall to stall, captivated by the vibrant colors and energetic atmosphere.
This place is alive with movement and life.`,
},
"doc6": {
"text": `In moments of crisis, bravery shines through.
It takes valor to step forward when others are afraid to act.
Heroes are defined by their guts and nerve, taking risks to protect others.
Boldness in the face of danger is what sets them apart.`,
},
"doc7": {
"text": `Innovation is the driving force behind progress in every industry.
The company fosters an environment of invention, encouraging creativity at every level.
The focus on novelty and improvement means that ideas are always evolving.
The development of new solutions is at the core of the company's mission.`,
},
"doc8": {
"text": `The blazing sunset cast a radiant glow over the horizon, painting the sky with hues of red and orange.
The intense heat of the day gave way to a fiery display of color.
As the sun set, the glowing light illuminated the landscape, creating a breathtaking scene.
The fiery sky was a sight to behold.`,
},
"doc9": {
"text": `The fertile soil of the valley makes it perfect for farming.
The productive land yields abundant crops year after year.
Farmers rely on the rich, fruitful ground to sustain their livelihoods.
The area is known for its plentiful harvests, supporting both local communities and export markets.`,
},
"doc10": {
"text": `The arid desert is a vast, dry expanse with little water or vegetation.
The barren landscape stretches as far as the eye can see, offering little respite from the scorching sun.
The desolate environment is unforgiving to those who venture too far without preparation.
The parched earth cracks under the heat, creating a harsh, unyielding terrain.`,
},
"doc11": {
"text": `The fox is known for its cunning and intelligence.
As a predator, it relies on its sharp instincts to outwit its prey.
Its vulpine nature makes it both mysterious and fascinating.
The fox's ability to hunt with precision and stealth is what makes it such a formidable hunter.`,
},
"doc12": {
"text": `The dog is often considered man's best friend due to its loyal nature.
As a companion, the hound provides both protection and affection.
The puppy quickly becomes a member of the family, always by your side.
Its playful energy and unshakable loyalty make it a beloved pet.`,
},
"doc13": {
"text": `He worked tirelessly through the night, always persistent in his efforts.
His industrious approach to problem-solving kept the project moving forward.
No matter how difficult the task, he remained focused, always giving his best.
His dedication paid off when the project was completed ahead of schedule.`,
},
"doc14": {
"text": `The river flowed calmly through the valley, its peaceful current offering a sense of tranquility.
Fishermen relaxed by the banks, enjoying the calm waters that reflected the sky above.
The tranquil nature of the river made it a perfect spot for meditation.
As the day ended, the river's quiet flow brought a sense of peace.`,
},
"doc15": {
"text": `After the fire, all that was left was the charred remains of what once was.
The seared walls of the house told a tragic story.
The intensity of the blaze had burned everything in its path, leaving only the smoldering wreckage behind.
The incinerated objects could not be salvaged, and the damage was beyond repair.`,
},
"doc16": {
"text": `The devoted employee always went above and beyond to complete his tasks.
His steadfast commitment to the company made him a valuable team member.
He was reliable, never failing to meet deadlines.
His trustworthiness earned him the respect of his colleagues, and was considered an
ingenious expert in his field.`,
},
"doc17": {
"text": `The city is vibrant, full of life and energy.
The dynamic pace of the streets reflects the diverse culture of its inhabitants.
People from all walks of life contribute to the energetic atmosphere.
The city's lively spirit can be felt in every corner, from the bustling markets to the lively festivals.`,
},
"doc18": {
"text": `In a moment of uncertainty, he made a bold decision that would change his life forever.
It took courage and nerve to take the leap, but his bravery paid off.
The guts to face the unknown allowed him to achieve something remarkable.
Being an bright scholar, the skill he demonstrated inspired those around him.`,
},
"doc19": {
"text": `Innovation is often born from necessity, and the lightbulb is a prime example.
Thomas Edison's invention changed the world, offering a new way to see the night.
The creativity involved in developing such a groundbreaking product sparked a wave of
novelty in the scientific community. This improvement in technology continues to shape the modern world.
He was a clever academic and a smart researcher.`,
},
"doc20": {
"text": `The fiery volcano erupted with a force that shook the earth. Its radiant lava flowed down the sides,
illuminating the night sky. The intense heat from the eruption could be felt miles away, as the
glowing lava burned everything in its path. The fiery display was both terrifying and mesmerizing.`,
},
}
synonymDocuments := map[string]*SynonymDefinition{
"synDoc1": {
Synonyms: []string{"hardworking", "industrious", "conscientious", "persistent", "focused", "devoted"},
},
"synDoc2": {
Synonyms: []string{"tranquil", "peaceful", "calm", "relaxed", "unruffled"},
},
"synDoc3": {
Synonyms: []string{"burned", "charred", "seared", "incinerated", "singed"},
},
"synDoc4": {
Synonyms: []string{"faithful", "steadfast", "devoted", "reliable", "trustworthy"},
},
"synDoc5": {
Synonyms: []string{"lively", "dynamic", "energetic", "vivid", "vibrating"},
},
"synDoc6": {
Synonyms: []string{"bravery", "valor", "guts", "nerve", "boldness"},
},
"synDoc7": {
Input: []string{"innovation"},
Synonyms: []string{"invention", "creativity", "novelty", "improvement", "development"},
},
"synDoc8": {
Input: []string{"blazing"},
Synonyms: []string{"intense", "radiant", "burning", "fiery", "glowing"},
},
"synDoc9": {
Input: []string{"fertile"},
Synonyms: []string{"productive", "fruitful", "rich", "abundant", "plentiful"},
},
"synDoc10": {
Input: []string{"arid"},
Synonyms: []string{"dry", "barren", "desolate", "parched", "unfertile"},
},
"synDoc11": {
Input: []string{"fox"},
Synonyms: []string{"vulpine", "canine", "predator", "hunter", "pursuer"},
},
"synDoc12": {
Input: []string{"dog"},
Synonyms: []string{"canine", "hound", "puppy", "pup", "companion"},
},
"synDoc13": {
Synonyms: []string{"researcher", "scientist", "scholar", "academic", "expert"},
},
"synDoc14": {
Synonyms: []string{"bright", "clever", "ingenious", "sharp", "astute", "smart"},
},
}
// Combine both maps into a slice of map entries (as they both have similar structure)
var combinedDocIDs []string
for id := range synonymDocuments {
combinedDocIDs = append(combinedDocIDs, id)
}
for id := range documents {
combinedDocIDs = append(combinedDocIDs, id)
}
rand.Shuffle(len(combinedDocIDs), func(i, j int) {
combinedDocIDs[i], combinedDocIDs[j] = combinedDocIDs[j], combinedDocIDs[i]
})
// Function to create batches of 5
createDocBatches := func(docs []string, batchSize int) [][]string {
var batches [][]string
for i := 0; i < len(docs); i += batchSize {
end := i + batchSize
if end > len(docs) {
end = len(docs)
}
batches = append(batches, docs[i:end])
}
return batches
}
// Create batches of 5 documents
batchSize := 5
docBatches := createDocBatches(combinedDocIDs, batchSize)
if len(docBatches) == 0 {
t.Fatal("expected batches")
}
totalDocs := 0
for _, batch := range docBatches {
totalDocs += len(batch)
}
if totalDocs != len(combinedDocIDs) {
t.Fatalf("expected %d documents, got %d", len(combinedDocIDs), totalDocs)
}
var batches []*Batch
for _, docBatch := range docBatches {
batch := idx.NewBatch()
for _, docID := range docBatch {
if synDef, ok := synonymDocuments[docID]; ok {
err := batch.IndexSynonym(docID, synonymCollection, synDef)
if err != nil {
t.Fatal(err)
}
} else {
err := batch.Index(docID, documents[docID])
if err != nil {
t.Fatal(err)
}
}
}
batches = append(batches, batch)
}
for _, batch := range batches {
err = idx.Batch(batch)
if err != nil {
t.Fatal(err)
}
}
type testStruct struct {
query string
expectHits []string
}
testQueries := []testStruct{
{
query: `{
"match": "hardworking employee",
"field": "text"
}`,
expectHits: []string{"doc1", "doc13", "doc16", "doc4", "doc7"},
},
{
query: `{
"match": "Hardwork and industrius efforts bring lovely and tranqual moments, with a glazing blow of valour.",
"field": "text",
"fuzziness": "auto"
}`,
expectHits: []string{
"doc1", "doc13", "doc14", "doc15", "doc16",
"doc17", "doc18", "doc2", "doc20", "doc3",
"doc4", "doc5", "doc6", "doc7", "doc8", "doc9",
},
},
{
query: `{
"prefix": "in",
"field": "text"
}`,
expectHits: []string{
"doc1", "doc11", "doc13", "doc15", "doc16",
"doc17", "doc18", "doc19", "doc2", "doc20",
"doc3", "doc4", "doc7", "doc8",
},
},
{
query: `{
"prefix": "vivid",
"field": "text"
}`,
expectHits: []string{
"doc17", "doc5",
},
},
{
query: `{
"match_phrase": "smart academic",
"field": "text"
}`,
expectHits: []string{"doc16", "doc18", "doc19"},
},
{
query: `{
"match_phrase": "smrat acedemic",
"field": "text",
"fuzziness": "auto"
}`,
expectHits: []string{"doc16", "doc18", "doc19"},
},
{
query: `{
"wildcard": "br*",
"field": "text"
}`,
expectHits: []string{"doc11", "doc14", "doc16", "doc18", "doc19", "doc6", "doc8"},
},
}
getTotalSynonymSearchStat := func(idx Index) int {
ir, err := idx.Advanced()
if err != nil {
t.Fatal(err)
}
stat := ir.StatsMap()["synonym_searches"].(uint64)
return int(stat)
}
runTestQueries := func(idx Index) error {
for _, dtq := range testQueries {
q, err := query.ParseQuery([]byte(dtq.query))
if err != nil {
return err
}
sr := NewSearchRequest(q)
sr.Highlight = NewHighlightWithStyle(ansi.Name)
sr.SortBy([]string{"_id"})
sr.Fields = []string{"*"}
sr.Size = 30
sr.Explain = true
res, err := idx.Search(sr)
if err != nil {
return err
}
if len(res.Hits) != len(dtq.expectHits) {
return fmt.Errorf("expected %d hits, got %d", len(dtq.expectHits), len(res.Hits))
}
// sort the expected hits to match the order of the search results
sort.Strings(dtq.expectHits)
for i, hit := range res.Hits {
if hit.ID != dtq.expectHits[i] {
return fmt.Errorf("expected docID %s, got %s", dtq.expectHits[i], hit.ID)
}
}
}
return nil
}
err = runTestQueries(idx)
if err != nil {
t.Fatal(err)
}
// now verify that the stat for number of synonym enabled queries is correct
totalSynonymSearchStat := getTotalSynonymSearchStat(idx)
if totalSynonymSearchStat != len(testQueries) {
t.Fatalf("expected %d synonym searches, got %d", len(testQueries), totalSynonymSearchStat)
}
// test with index alias - with 1 batch per index
numIndexes := len(batches)
indexes := make([]Index, numIndexes)
indexesPath := make([]string, numIndexes)
for i := 0; i < numIndexes; i++ {
tmpIndexPath := createTmpIndexPath(t)
idx, err := New(tmpIndexPath, imap)
if err != nil {
t.Fatal(err)
}
err = idx.Batch(batches[i])
if err != nil {
t.Fatal(err)
}
indexes[i] = idx
indexesPath[i] = tmpIndexPath
}
defer func() {
for i := 0; i < numIndexes; i++ {
err = indexes[i].Close()
if err != nil {
t.Fatal(err)
}
cleanupTmpIndexPath(t, indexesPath[i])
}
}()
alias := NewIndexAlias(indexes...)
if err := alias.SetIndexMapping(imap); err != nil {
t.Fatal(err)
}
err = runTestQueries(alias)
if err != nil {
t.Fatal(err)
}
// verify the synonym search stat for the alias
totalSynonymSearchStat = getTotalSynonymSearchStat(indexes[0])
if totalSynonymSearchStat != len(testQueries) {
t.Fatalf("expected %d synonym searches, got %d", len(testQueries), totalSynonymSearchStat)
}
for i := 1; i < numIndexes; i++ {
idxStat := getTotalSynonymSearchStat(indexes[i])
if idxStat != totalSynonymSearchStat {
t.Fatalf("expected %d synonym searches, got %d", totalSynonymSearchStat, idxStat)
}
}
if totalSynonymSearchStat != len(testQueries) {
t.Fatalf("expected %d synonym searches, got %d", len(testQueries), totalSynonymSearchStat)
}
// test with multi-level alias now with two index per alias
// and having any extra index being in the final alias
numAliases := numIndexes / 2
extraIndex := numIndexes % 2
aliases := make([]IndexAlias, numAliases)
for i := 0; i < numAliases; i++ {
alias := NewIndexAlias(indexes[i*2], indexes[i*2+1])
aliases[i] = alias
}
if extraIndex > 0 {
aliases[numAliases-1].Add(indexes[numIndexes-1])
}
alias = NewIndexAlias()
if err := alias.SetIndexMapping(imap); err != nil {
t.Fatal(err)
}
for i := 0; i < numAliases; i++ {
alias.Add(aliases[i])
}
err = runTestQueries(alias)
if err != nil {
t.Fatal(err)
}
// verify the synonym searches stat for the alias
totalSynonymSearchStat = getTotalSynonymSearchStat(indexes[0])
if totalSynonymSearchStat != 2*len(testQueries) {
t.Fatalf("expected %d synonym searches, got %d", len(testQueries), totalSynonymSearchStat)
}
for i := 1; i < numIndexes; i++ {
idxStat := getTotalSynonymSearchStat(indexes[i])
if idxStat != totalSynonymSearchStat {
t.Fatalf("expected %d synonym searches, got %d", totalSynonymSearchStat, idxStat)
}
}
}
func TestGeoDistanceInSort(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath)
fm := mapping.NewGeoPointFieldMapping()
imap := mapping.NewIndexMapping()
imap.DefaultMapping.AddFieldMappingsAt("geo", fm)
idx, err := New(tmpIndexPath, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
}()
qp := []float64{0, 0}
docs := []struct {
id string
point []float64
distance float64
}{
{
id: "1",
point: []float64{1, 1},
distance: geo.Haversin(1, 1, qp[0], qp[1]) * 1000,
},
{
id: "2",
point: []float64{2, 2},
distance: geo.Haversin(2, 2, qp[0], qp[1]) * 1000,
},
{
id: "3",
point: []float64{3, 3},
distance: geo.Haversin(3, 3, qp[0], qp[1]) * 1000,
},
}
for _, doc := range docs {
if err := idx.Index(doc.id, map[string]interface{}{"geo": doc.point}); err != nil {
t.Fatal(err)
}
}
q := NewGeoDistanceQuery(qp[0], qp[1], "1000000m")
q.SetField("geo")
req := NewSearchRequest(q)
req.Sort = make(search.SortOrder, 0)
req.Sort = append(req.Sort, &search.SortGeoDistance{
Field: "geo",
Desc: false,
Unit: "m",
Lon: qp[0],
Lat: qp[1],
})
res, err := idx.Search(req)
if err != nil {
t.Fatal(err)
}
for i, doc := range res.Hits {
hitDist, err := strconv.ParseFloat(doc.DecodedSort[0], 64)
if err != nil {
t.Fatal(err)
}
if math.Abs(hitDist-docs[i].distance) > 1 {
t.Fatalf("distance error greater than 1 meter, expected distance - %v, got - %v", docs[i].distance, hitDist)
}
}
}
func TestFilteredBooleanQuery(t *testing.T) {
tmpIndexPath := createTmpIndexPath(t)
imap := mapping.NewIndexMapping()
genreMapping := mapping.NewTextFieldMapping()
genreMapping.Analyzer = keyword.Name
authorMapping := mapping.NewTextFieldMapping()
authorMapping.Analyzer = keyword.Name
titleMapping := mapping.NewTextFieldMapping()
titleMapping.Analyzer = en.AnalyzerName
tagsMapping := mapping.NewNumericFieldMapping()
tagsMapping.Store = false
tagsMapping.IncludeInAll = false
priceMapping := mapping.NewNumericFieldMapping()
imap.DefaultMapping.AddFieldMappingsAt("genre", genreMapping)
imap.DefaultMapping.AddFieldMappingsAt("author", authorMapping)
imap.DefaultMapping.AddFieldMappingsAt("title", titleMapping)
imap.DefaultMapping.AddFieldMappingsAt("price", priceMapping)
imap.DefaultMapping.AddFieldMappingsAt("tags", tagsMapping)
idx, err := New(tmpIndexPath, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Fatal(err)
}
err := os.RemoveAll(tmpIndexPath)
if err != nil {
t.Fatal(err)
}
}()
// Book dataset
var docs = []map[string]interface{}{
{
"title": "The Catcher in the Rye",
"author": "J.D. Salinger",
"genre": "fiction",
"price": 9.99,
"tags": []int{1, 2, 3},
},
{
"title": "Sapiens",
"author": "Yuval Noah Harari",
"genre": "non-fiction",
"price": 14.29,
"tags": []int{2},
},
{
"title": "To Kill a Mockingbird",
"author": "Harper Lee",
"genre": "fiction",
"price": 12,
"tags": []int{},
},
{
"title": "The Power of Habit",
"author": "Charles Duhigg",
"genre": "self-help",
"price": 26,
"tags": []int{1, 2},
},
{
"title": "The Great Gatsby",
"author": "F. Scott Fitzgerald",
"genre": "fiction",
"price": 22,
"tags": []int{1, 2},
},
{
"title": "Atomic Habits",
"author": "James Clear",
"genre": "self-help",
"price": 15,
"tags": []int{3},
},
{
"title": "Educated",
"author": "Tara Westover",
"genre": "non-fiction",
"price": 18,
},
{
"title": "1984",
"author": "George Orwell",
"genre": "fiction",
"price": 20,
},
}
b := idx.NewBatch()
for i, doc := range docs {
err := b.Index(strconv.Itoa(i), doc)
if err != nil {
t.Fatal(err)
}
}
// execute the batch
err = idx.Batch(b)
if err != nil {
t.Fatal(err)
}
// Suppose the user is interested in books in the fiction genre
// and is only interested in books within their budget of 20
fictionQuery := NewTermQuery("fiction")
fictionQuery.SetField("genre")
// A numeric range query for books with a price less than or equal to 20
max := float64(20)
maxInclusive := true
priceFilterQuery := NewNumericRangeQuery(nil, &max)
priceFilterQuery.InclusiveMax = &maxInclusive
priceFilterQuery.SetField("price")
// An unfiltered boolean query requesting all books in the fiction genre
// All 4 books in the fiction genre should be returned with the
// same score as they are all in the same genre
q := NewBooleanQuery()
q.AddMust(fictionQuery)
req := NewSearchRequest(q)
req.Explain = true
req.Fields = []string{"title"}
// sort by book titles in ascending order
req.Sort = make(search.SortOrder, 0)
titleSort := &search.SortField{
Field: "price",
Desc: false,
}
req.Sort = append(req.Sort, titleSort)
res, err := idx.Search(req)
if err != nil {
t.Fatal(err)
}
if len(res.Hits) != 4 {
t.Fatalf("expected 4 hits, got %d", len(res.Hits))
}
// Verify the results are in the correct order
expectedTitleOrder := []string{
"The Catcher in the Rye",
"To Kill a Mockingbird",
"1984",
"The Great Gatsby",
}
for i, doc := range res.Hits {
if doc.Fields["title"] != expectedTitleOrder[i] {
t.Fatalf("expected title %s, got %s", expectedTitleOrder[i], doc.Fields["title"])
}
}
// Ensure that the scores are the same for all documents
unfilteredScore := res.Hits[0].Score
for i := 1; i < len(res.Hits); i++ {
if res.Hits[i].Score != unfilteredScore {
t.Fatalf("expected score %f, got %f", unfilteredScore, res.Hits[i].Score)
}
}
// A filtered boolean query requesting all books satisfying the
// filterQuery and the priceFilterQuery
// But the filter query is in the Must clause
q = NewBooleanQuery()
q.AddMust(fictionQuery)
q.AddMust(priceFilterQuery)
req = NewSearchRequest(q)
req.Explain = true
req.Fields = []string{"title"}
// sort by book titles in ascending order
req.Sort = make(search.SortOrder, 0)
req.Sort = append(req.Sort, titleSort)
res, err = idx.Search(req)
if err != nil {
t.Fatal(err)
}
// here the score must not be the same for all documents
// as the price filter is applied in the Must clause
// and the score is different compared to the previous unfiltered boolean query
if len(res.Hits) != 3 {
t.Fatalf("expected 3 hits, got %d", len(res.Hits))
}
// Verify the results are in the correct order
expectedTitleOrder = []string{
"The Catcher in the Rye",
"To Kill a Mockingbird",
"1984",
}
for i, doc := range res.Hits {
if doc.Fields["title"] != expectedTitleOrder[i] {
t.Fatalf("expected title %s, got %s", expectedTitleOrder[i], doc.Fields["title"])
}
}
// Ensure that the scores are different for all documents
for i := 0; i < len(res.Hits); i++ {
if res.Hits[i].Score == unfilteredScore {
t.Fatalf("expected different score, got %f", res.Hits[i].Score)
}
}
// A filtered boolean query requesting all books satisfying the
// filterQuery and the priceFilterQuery
// But the filter query is in the Filter clause
q = NewBooleanQuery()
q.AddMust(fictionQuery)
q.AddFilter(priceFilterQuery)
req = NewSearchRequest(q)
req.Explain = true
req.Fields = []string{"title"}
req.Sort = make(search.SortOrder, 0)
req.Sort = append(req.Sort, titleSort)
res, err = idx.Search(req)
if err != nil {
t.Fatal(err)
}
if len(res.Hits) != 3 {
t.Fatalf("expected 3 hits, got %d", len(res.Hits))
}
// Verify the results are in the correct order
for i, doc := range res.Hits {
if doc.Fields["title"] != expectedTitleOrder[i] {
t.Fatalf("expected title %s, got %s", expectedTitleOrder[i], doc.Fields["title"])
}
}
// Ensure that the scores are the same for all documents
for i := 0; i < len(res.Hits); i++ {
if res.Hits[i].Score != unfilteredScore {
t.Fatalf("expected score %f, got %f", unfilteredScore, res.Hits[i].Score)
}
}
// A filtered boolean query requesting all books with tag value 3
// The filter is in the Filter clause
// Two books have tag value 3
p := float64(3)
incl := true
eqlFilter := NewNumericRangeInclusiveQuery(&p, &p, &incl, &incl)
eqlFilter.SetField("tags")
q = NewBooleanQuery()
q.AddFilter(eqlFilter)
req = NewSearchRequest(q)
req.Fields = []string{"title"}
res, err = idx.Search(req)
if err != nil {
t.Fatal(err)
}
if res.Hits.Len() != 2 {
t.Fatalf("expected two hits, found '%d'", res.Hits.Len())
}
if res.Total != 2 {
t.Fatalf("expected two total, found '%d'", res.Total)
}
}
func TestGeoDistanceInSortAlias(t *testing.T) {
tmpIndexPath1 := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath1)
tmpIndexPath2 := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath2)
fm := mapping.NewGeoPointFieldMapping()
imap := mapping.NewIndexMapping()
imap.DefaultMapping.AddFieldMappingsAt("geo", fm)
idx1, err := New(tmpIndexPath1, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx1.Close()
if err != nil {
t.Fatal(err)
}
}()
idx2, err := New(tmpIndexPath2, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx2.Close()
if err != nil {
t.Fatal(err)
}
}()
qp := []float64{0, 0}
docs := []struct {
id string
point []float64
distance float64
}{
{
id: "1",
point: []float64{1, 1},
distance: geo.Haversin(1, 1, qp[0], qp[1]) * 1000,
},
{
id: "2",
point: []float64{2, 2},
distance: geo.Haversin(2, 2, qp[0], qp[1]) * 1000,
},
{
id: "3",
point: []float64{3, 3},
distance: geo.Haversin(3, 3, qp[0], qp[1]) * 1000,
},
}
if err := idx1.Index(docs[0].id, map[string]interface{}{"geo": docs[0].point}); err != nil {
t.Fatal(err)
}
if err := idx2.Index(docs[1].id, map[string]interface{}{"geo": docs[1].point}); err != nil {
t.Fatal(err)
}
if err := idx1.Index(docs[2].id, map[string]interface{}{"geo": docs[2].point}); err != nil {
t.Fatal(err)
}
idx := NewIndexAlias(idx1, idx2)
q := NewGeoDistanceQuery(qp[0], qp[1], "1000000m")
q.SetField("geo")
req := NewSearchRequest(q)
req.Sort = make(search.SortOrder, 0)
req.Sort = append(req.Sort, &search.SortGeoDistance{
Field: "geo",
Lon: qp[0],
Lat: qp[1],
})
res, err := idx.Search(req)
if err != nil {
t.Fatal(err)
}
for i, doc := range res.Hits {
hitDist, err := strconv.ParseFloat(doc.DecodedSort[0], 64)
if err != nil {
t.Fatal(err)
}
if math.Abs(hitDist-docs[i].distance) > 1 {
t.Fatalf("distance error greater than 1 meter, expected distance - %v, got - %v", docs[i].distance, hitDist)
}
}
}
func TestDateSortAlias(t *testing.T) {
tmpIndexPath1 := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath1)
tmpIndexPath2 := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath2)
fm := mapping.NewDateTimeFieldMapping()
imap := mapping.NewIndexMapping()
imap.DefaultMapping.AddFieldMappingsAt("date", fm)
idx1, err := New(tmpIndexPath1, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx1.Close()
if err != nil {
t.Fatal(err)
}
}()
idx2, err := New(tmpIndexPath2, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx2.Close()
if err != nil {
t.Fatal(err)
}
}()
docs := []struct {
id string
date string
}{
{
id: "1",
date: "2023-01-01",
},
{
id: "2",
date: "2023-02-01",
},
{
id: "3",
date: "2023-03-01",
},
}
if err := idx1.Index(docs[0].id, map[string]interface{}{"date": docs[0].date}); err != nil {
t.Fatal(err)
}
if err := idx2.Index(docs[1].id, map[string]interface{}{"date": docs[1].date}); err != nil {
t.Fatal(err)
}
if err := idx1.Index(docs[2].id, map[string]interface{}{"date": docs[2].date}); err != nil {
t.Fatal(err)
}
idx := NewIndexAlias(idx1, idx2)
q := query.NewMatchAllQuery()
req := NewSearchRequest(q)
req.Sort = make(search.SortOrder, 0)
req.Sort = append(req.Sort, &search.SortField{
Field: "date",
Type: search.SortFieldAsDate,
})
res, err := idx.Search(req)
if err != nil {
t.Fatal(err)
}
for i, doc := range res.Hits {
expectedDate, err := time.Parse("2006-01-02", docs[i].date)
if err != nil {
t.Fatal(err)
}
expectedDateStr := expectedDate.UTC().Format(time.RFC3339Nano)
if doc.DecodedSort[0] != expectedDateStr {
t.Fatalf("expected date %s, got %s", doc.DecodedSort[0], expectedDateStr)
}
}
}
func TestNumericSortAlias(t *testing.T) {
tmpIndexPath1 := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath1)
tmpIndexPath2 := createTmpIndexPath(t)
defer cleanupTmpIndexPath(t, tmpIndexPath2)
fm := mapping.NewNumericFieldMapping()
imap := mapping.NewIndexMapping()
imap.DefaultMapping.AddFieldMappingsAt("num", fm)
idx1, err := New(tmpIndexPath1, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx1.Close()
if err != nil {
t.Fatal(err)
}
}()
idx2, err := New(tmpIndexPath2, imap)
if err != nil {
t.Fatal(err)
}
defer func() {
err = idx2.Close()
if err != nil {
t.Fatal(err)
}
}()
docs := []struct {
id string
num int
}{
{
id: "1",
num: 10,
},
{
id: "2",
num: 20,
},
{
id: "3",
num: 30,
},
}
if err := idx1.Index(docs[0].id, map[string]interface{}{"num": docs[0].num}); err != nil {
t.Fatal(err)
}
if err := idx2.Index(docs[1].id, map[string]interface{}{"num": docs[1].num}); err != nil {
t.Fatal(err)
}
if err := idx1.Index(docs[2].id, map[string]interface{}{"num": docs[2].num}); err != nil {
t.Fatal(err)
}
idx := NewIndexAlias(idx1, idx2)
q := query.NewMatchAllQuery()
req := NewSearchRequest(q)
req.Sort = make(search.SortOrder, 0)
req.Sort = append(req.Sort, &search.SortField{
Field: "num",
Type: search.SortFieldAsNumber,
})
res, err := idx.Search(req)
if err != nil {
t.Fatal(err)
}
for i, doc := range res.Hits {
hitNum, err := strconv.Atoi(doc.DecodedSort[0])
if err != nil {
t.Fatal(err)
}
if hitNum != docs[i].num {
t.Fatalf("expected num %d, got %d", docs[i].num, hitNum)
}
}
}
func TestSearchRequestValidatePagination(t *testing.T) {
tests := []struct {
name string
req *SearchRequest
expectErr error
}{
{
name: "invalid search after with numeric sort",
req: &SearchRequest{
Query: NewMatchAllQuery(),
Sort: search.SortOrder{
&search.SortField{Field: "num", Type: search.SortFieldAsNumber},
},
SearchAfter: []string{"not-a-number"},
},
expectErr: fmt.Errorf("invalid search after value for sort field 'num': 'not-a-number'. strconv.ParseFloat: parsing \"not-a-number\": invalid syntax"),
},
{
name: "invalid search before with numeric sort",
req: &SearchRequest{
Query: NewMatchAllQuery(),
Sort: search.SortOrder{
&search.SortField{Field: "num", Type: search.SortFieldAsNumber},
},
SearchBefore: []string{"not-a-number"},
},
expectErr: fmt.Errorf("invalid search before value for sort field 'num': 'not-a-number'. strconv.ParseFloat: parsing \"not-a-number\": invalid syntax"),
},
{
name: "invalid search after with date sort",
req: &SearchRequest{
Query: NewMatchAllQuery(),
Sort: search.SortOrder{
&search.SortField{Field: "date", Type: search.SortFieldAsDate},
},
SearchAfter: []string{"1 March 2023"},
},
expectErr: fmt.Errorf("invalid search after value for sort field 'date': '1 March 2023'. parsing time \"1 March 2023\" as \"2006-01-02T15:04:05.999999999Z07:00\": cannot parse \"1 March 2023\" as \"2006\""),
},
{
name: "invalid search before with date sort",
req: &SearchRequest{
Query: NewMatchAllQuery(),
Sort: search.SortOrder{
&search.SortField{Field: "date", Type: search.SortFieldAsDate},
},
SearchBefore: []string{"1 March 2023"},
},
expectErr: fmt.Errorf("invalid search before value for sort field 'date': '1 March 2023'. parsing time \"1 March 2023\" as \"2006-01-02T15:04:05.999999999Z07:00\": cannot parse \"1 March 2023\" as \"2006\""),
},
{
name: "invalid search after with geo distance sort",
req: &SearchRequest{
Query: NewMatchAllQuery(),
Sort: search.SortOrder{
&search.SortGeoDistance{Field: "geo"},
},
SearchAfter: []string{"not-a-number"},
},
expectErr: fmt.Errorf("invalid search after value for sort field 'geo': 'not-a-number'. strconv.ParseFloat: parsing \"not-a-number\": invalid syntax"),
},
{
name: "invalid search before with geo distance sort",
req: &SearchRequest{
Query: NewMatchAllQuery(),
Sort: search.SortOrder{
&search.SortGeoDistance{Field: "geo"},
},
SearchBefore: []string{"not-a-number"},
},
expectErr: fmt.Errorf("invalid search before value for sort field 'geo': 'not-a-number'. strconv.ParseFloat: parsing \"not-a-number\": invalid syntax"),
},
{
name: "valid search after with text sort",
req: &SearchRequest{
Query: NewMatchAllQuery(),
Sort: search.SortOrder{
&search.SortField{Field: "text", Type: search.SortFieldAsString},
},
SearchAfter: []string{"anything"},
},
expectErr: nil,
},
{
name: "valid search after with numeric sort",
req: &SearchRequest{
Query: NewMatchAllQuery(),
Sort: search.SortOrder{
&search.SortField{Field: "num", Type: search.SortFieldAsNumber},
},
SearchAfter: []string{"50.5"},
},
expectErr: nil,
},
{
name: "valid search after with date sort",
req: &SearchRequest{
Query: NewMatchAllQuery(),
Sort: search.SortOrder{
&search.SortField{Field: "date", Type: search.SortFieldAsDate},
},
SearchAfter: []string{time.Now().UTC().Format(time.RFC3339Nano)},
},
expectErr: nil,
},
{
name: "valid search after with geo distance sort",
req: &SearchRequest{
Query: NewMatchAllQuery(),
Sort: search.SortOrder{
&search.SortGeoDistance{Field: "geo"},
},
SearchAfter: []string{"1.234"},
},
expectErr: nil,
},
}
for _, test := range tests {
t.Run(test.name, func(t *testing.T) {
err := test.req.Validate()
if test.expectErr != nil {
if err == nil {
t.Fatalf("expected error: %v, got nil", test.expectErr)
}
if err.Error() != test.expectErr.Error() {
t.Fatalf("expected error: %v, got: %v", test.expectErr, err)
}
} else if err != nil {
t.Fatalf("expected no error, got: %v", err)
}
})
}
}
================================================
FILE: size/sizes.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package size
import (
"reflect"
)
func init() {
var b bool
SizeOfBool = int(reflect.TypeOf(b).Size())
var f32 float32
SizeOfFloat32 = int(reflect.TypeOf(f32).Size())
var f64 float64
SizeOfFloat64 = int(reflect.TypeOf(f64).Size())
var i int
SizeOfInt = int(reflect.TypeOf(i).Size())
var m map[int]int
SizeOfMap = int(reflect.TypeOf(m).Size())
var ptr *int
SizeOfPtr = int(reflect.TypeOf(ptr).Size())
var slice []int
SizeOfSlice = int(reflect.TypeOf(slice).Size())
var str string
SizeOfString = int(reflect.TypeOf(str).Size())
var u8 uint8
SizeOfUint8 = int(reflect.TypeOf(u8).Size())
var u16 uint16
SizeOfUint16 = int(reflect.TypeOf(u16).Size())
var u32 uint32
SizeOfUint32 = int(reflect.TypeOf(u32).Size())
var u64 uint64
SizeOfUint64 = int(reflect.TypeOf(u64).Size())
}
var SizeOfBool int
var SizeOfFloat32 int
var SizeOfFloat64 int
var SizeOfInt int
var SizeOfMap int
var SizeOfPtr int
var SizeOfSlice int
var SizeOfString int
var SizeOfUint8 int
var SizeOfUint16 int
var SizeOfUint32 int
var SizeOfUint64 int
================================================
FILE: test/integration.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package test
import (
"github.com/blevesearch/bleve/v2"
)
type SearchTest struct {
Search *bleve.SearchRequest `json:"search"`
Result *bleve.SearchResult `json:"result"`
Comment string `json:"comment"`
}
type SearchTests []*SearchTest
================================================
FILE: test/integration_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package test
import (
"encoding/json"
"flag"
"fmt"
"math"
"os"
"path/filepath"
"reflect"
"regexp"
"testing"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/mapping"
// allow choosing alternate kvstores
_ "github.com/blevesearch/bleve/v2/config"
)
var dataset = flag.String("dataset", "", "only test datasets matching this regex")
var onlynum = flag.Int("testnum", -1, "only run the test with this number")
var keepIndex = flag.Bool("keepIndex", false, "keep the index after testing")
var indexType = flag.String("indexType", bleve.Config.DefaultIndexType, "index type to build")
var kvType = flag.String("kvType", bleve.Config.DefaultKVStore, "kv store type to build")
var segType = flag.String("segType", "", "force scorch segment type")
var segVer = flag.Int("segVer", 0, "force scorch segment version")
func TestIntegration(t *testing.T) {
flag.Parse()
t.Logf("using index type %s and kv type %s", *indexType, *kvType)
if *segType != "" {
t.Logf("forcing segment type: %s", *segType)
}
if *segVer != 0 {
t.Logf("forcing segment version: %d", *segVer)
}
var err error
var datasetRegexp *regexp.Regexp
if *dataset != "" {
datasetRegexp, err = regexp.Compile(*dataset)
if err != nil {
t.Fatal(err)
}
}
entries, err := os.ReadDir("tests")
if err != nil {
t.Fatal(err)
}
for _, f := range entries {
if datasetRegexp != nil {
if !datasetRegexp.MatchString(f.Name()) {
continue
}
}
if f.IsDir() {
t.Logf("Running test: %s", f.Name())
runTestDir(t, "tests"+string(filepath.Separator)+f.Name(), f.Name())
}
}
}
func runTestDir(t *testing.T, dir, datasetName string) {
// read the mapping
mappingBytes, err := os.ReadFile(dir + string(filepath.Separator) + "mapping.json")
if err != nil {
t.Errorf("error reading mapping: %v", err)
return
}
var mapping mapping.IndexMappingImpl
err = json.Unmarshal(mappingBytes, &mapping)
if err != nil {
t.Errorf("error unmarshalling mapping: %v", err)
return
}
var index bleve.Index
var cleanup func()
// if there is a dir named 'data' open single index
_, err = os.Stat(dir + string(filepath.Separator) + "data")
if !os.IsNotExist(err) {
index, cleanup, err = loadDataSet(t, datasetName, mapping, dir+string(filepath.Separator)+"data")
if err != nil {
t.Errorf("error loading dataset: %v", err)
return
}
defer cleanup()
} else {
// if there is a dir named 'datasets' build alias over each index
_, err = os.Stat(dir + string(filepath.Separator) + "datasets")
if !os.IsNotExist(err) {
index, cleanup, err = loadDataSets(t, datasetName, mapping, dir+string(filepath.Separator)+"datasets")
if err != nil {
t.Errorf("error loading dataset: %v", err)
return
}
defer cleanup()
}
}
// read the searches
searchBytes, err := os.ReadFile(dir + string(filepath.Separator) + "searches.json")
if err != nil {
t.Errorf("error reading searches: %v", err)
return
}
var searches SearchTests
err = json.Unmarshal(searchBytes, &searches)
if err != nil {
t.Errorf("error unmarshalling searches: %v", err)
return
}
// run the searches
for testNum, search := range searches {
if *onlynum < 0 || (*onlynum > 0 && testNum == *onlynum) {
res, err := index.Search(search.Search)
if err != nil {
t.Errorf("error running search: %v", err)
}
if res.Total != search.Result.Total {
t.Errorf("test error - %s", search.Comment)
t.Errorf("test %d - expected total: %d got %d", testNum, search.Result.Total, res.Total)
continue
}
if len(res.Hits) != len(search.Result.Hits) {
t.Errorf("test error - %s", search.Comment)
t.Errorf("test %d - expected hits len: %d got %d", testNum, len(search.Result.Hits), len(res.Hits))
t.Errorf("got hits: %v", res.Hits)
continue
}
for hi, hit := range search.Result.Hits {
if hit.ID != res.Hits[hi].ID {
t.Errorf("test error - %s", search.Comment)
t.Errorf("test %d - expected hit %d to have ID %s got %s", testNum, hi, hit.ID, res.Hits[hi].ID)
}
if hit.Fields != nil {
if !reflect.DeepEqual(hit.Fields, res.Hits[hi].Fields) {
t.Errorf("test error - %s", search.Comment)
t.Errorf("test %d - expected hit %d to have fields %#v got %#v", testNum, hi, hit.Fields, res.Hits[hi].Fields)
}
}
if hit.Fragments != nil {
if !reflect.DeepEqual(hit.Fragments, res.Hits[hi].Fragments) {
t.Errorf("test error - %s", search.Comment)
t.Errorf("test %d - expected hit %d to have fragments %#v got %#v", testNum, hi, hit.Fragments, res.Hits[hi].Fragments)
}
}
if hit.Locations != nil {
if !reflect.DeepEqual(hit.Locations, res.Hits[hi].Locations) {
t.Errorf("test error - %s", search.Comment)
t.Errorf("test %d - expected hit %d to have locations %#v got %#v", testNum, hi, hit.Locations, res.Hits[hi].Locations)
}
}
// assert that none of the scores were NaN,+Inf,-Inf
if math.IsInf(res.Hits[hi].Score, 0) || math.IsNaN(res.Hits[hi].Score) {
t.Errorf("test error - %s", search.Comment)
t.Errorf("test %d - invalid score %f", testNum, res.Hits[hi].Score)
}
}
if search.Result.Facets != nil {
if !reflect.DeepEqual(search.Result.Facets, res.Facets) {
t.Errorf("test error - %s", search.Comment)
t.Errorf("test %d - expected facets: %#v got %#v", testNum, search.Result.Facets, res.Facets)
}
}
if _, ok := index.(bleve.IndexAlias); !ok {
// check that custom index name is in results
for _, hit := range res.Hits {
if hit.Index != datasetName {
t.Fatalf("expected name: %s, got: %s", datasetName, hit.Index)
}
}
}
}
}
}
func loadDataSet(t *testing.T, datasetName string, mapping mapping.IndexMappingImpl, path string) (bleve.Index, func(), error) {
idxPath := fmt.Sprintf("test-%s.bleve", datasetName)
cfg := map[string]interface{}{}
if *segType != "" {
cfg["forceSegmentType"] = *segType
}
if *segVer != 0 {
cfg["forceSegmentVersion"] = *segVer
}
index, err := bleve.NewUsing(idxPath, &mapping, *indexType, *kvType, cfg)
if err != nil {
return nil, nil, fmt.Errorf("error creating new index: %v", err)
}
// set a custom index name
index.SetName(datasetName)
// index data
entries, err := os.ReadDir(path)
if err != nil {
return nil, nil, fmt.Errorf("error reading data dir: %v", err)
}
for _, f := range entries {
fileBytes, err := os.ReadFile(path + string(filepath.Separator) + f.Name())
if err != nil {
return nil, nil, fmt.Errorf("error reading data file: %v", err)
}
var fileDoc interface{}
err = json.Unmarshal(fileBytes, &fileDoc)
if err != nil {
return nil, nil, fmt.Errorf("error parsing data file as json: %v", err)
}
filename := f.Name()
ext := filepath.Ext(filename)
id := filename[0 : len(filename)-len(ext)]
err = index.Index(id, fileDoc)
if err != nil {
return nil, nil, fmt.Errorf("error indexing data: %v", err)
}
}
cleanup := func() {
err := index.Close()
if err != nil {
t.Fatalf("error closing index: %v", err)
}
if !*keepIndex {
err := os.RemoveAll(idxPath)
if err != nil {
t.Fatalf("error removing index: %v", err)
}
}
}
return index, cleanup, nil
}
func loadDataSets(t *testing.T, datasetName string, mapping mapping.IndexMappingImpl, path string) (bleve.Index, func(), error) {
entries, err := os.ReadDir(path)
if err != nil {
return nil, nil, fmt.Errorf("error reading datasets dir: %v", err)
}
var cleanups []func()
alias := bleve.NewIndexAlias()
for _, f := range entries {
idx, idxCleanup, err := loadDataSet(t, f.Name(), mapping, path+string(filepath.Separator)+f.Name())
if err != nil {
return nil, nil, fmt.Errorf("error loading dataset: %v", err)
}
cleanups = append(cleanups, idxCleanup)
alias.Add(idx)
}
alias.SetName(datasetName)
cleanupAll := func() {
for _, cleanup := range cleanups {
cleanup()
}
}
return alias, cleanupAll, nil
}
================================================
FILE: test/ip_field_test.go
================================================
// Copyright (c) 2021 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package test
import (
"net"
"testing"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/mapping"
)
type doc struct {
IP string `json:"ip"`
}
func createIdx(t *testing.T) bleve.Index {
ipIndexed := mapping.NewIPFieldMapping()
ipIndexed.Name = "ip"
lineMapping := bleve.NewDocumentStaticMapping()
lineMapping.AddFieldMappingsAt("ip", ipIndexed)
mapping := bleve.NewIndexMapping()
mapping.DefaultMapping = lineMapping
mapping.DefaultAnalyzer = "standard"
idx, err := bleve.NewMemOnly(mapping)
if err != nil {
t.Fatal(err)
}
return idx
}
func Test_ipv4CidrQuery(t *testing.T) {
idx := createIdx(t)
defer idx.Close()
err := idx.Index("id1", doc{"192.168.1.21"})
if err != nil {
t.Fatal(err)
}
reqStr := `192.168.1.0/24`
query := bleve.NewIPRangeQuery(reqStr)
query.FieldVal = "ip"
search := bleve.NewSearchRequest(query)
res, err := idx.Search(search)
if err != nil {
t.Fatal(err)
}
if res.Total != 1 {
t.Fatalf("failed to find %q, res -> %s", reqStr, res)
}
if res.Hits[0].ID != "id1" {
t.Fatalf("expected %q got %q", "id1", res.Hits[0].Index)
}
}
func Test_ipv6CidrQuery(t *testing.T) {
idx := createIdx(t)
defer idx.Close()
err := idx.Index("id1", doc{"2a00:23c8:7283:ff00:1fa8:2af6:9dec:6b19"})
if err != nil {
t.Fatal(err)
}
reqStr := `2a00:23c8:7283:ff00:1fa8:0:0:0/80`
query := bleve.NewIPRangeQuery(reqStr)
query.FieldVal = "ip"
search := bleve.NewSearchRequest(query)
res, err := idx.Search(search)
if err != nil {
t.Fatal(err)
}
if res.Total != 1 {
t.Fatalf("failed to find %q, res -> %s", reqStr, res)
}
if res.Hits[0].ID != "id1" {
t.Fatalf("expected %q got %q", "id1", res.Hits[0].Index)
}
}
func Test_MultiIPvr4CidrQuery(t *testing.T) {
idx := createIdx(t)
defer idx.Close()
err := idx.Index("id1", doc{"192.168.1.0"})
if err != nil {
t.Fatal(err)
}
err = idx.Index("id2", doc{"192.168.1.255"})
if err != nil {
t.Fatal(err)
}
err = idx.Index("id3", doc{"192.168.2.22"})
if err != nil {
t.Fatal(err)
}
reqStr := `192.168.1.0/24`
query := bleve.NewIPRangeQuery(reqStr)
query.FieldVal = "ip"
search := bleve.NewSearchRequest(query)
res, err := idx.Search(search)
if err != nil {
t.Fatal(err)
}
if res.Total != 2 {
t.Fatalf("failed to find %q, res -> %s", reqStr, res)
}
if res.Hits[0].ID != "id1" {
t.Fatalf("expected %q got %q", "id1", res.Hits[0].ID)
}
if res.Hits[1].ID != "id2" {
t.Fatalf("expected %q got %q", "id2", res.Hits[0].Index)
}
}
func Test_CidrQueryNonDivisibleBy8(t *testing.T) {
idx := createIdx(t)
defer idx.Close()
err := idx.Index("id1", doc{"192.168.1.1"})
if err != nil {
t.Fatal(err)
}
err = idx.Index("id2", doc{"192.168.1.2"})
if err != nil {
t.Fatal(err)
}
err = idx.Index("id3", doc{"192.168.2.5"})
if err != nil {
t.Fatal(err)
}
err = idx.Index("id4", doc{"192.168.2.6"})
if err != nil {
t.Fatal(err)
}
reqStr := `192.168.1.0/30`
query := bleve.NewIPRangeQuery(reqStr)
query.FieldVal = "ip"
search := bleve.NewSearchRequest(query)
res, err := idx.Search(search)
if err != nil {
t.Fatal(err)
}
if res.Total != 2 {
t.Fatalf("failed to find %q, res -> %s", reqStr, res)
}
if res.Hits[0].ID != "id1" {
t.Fatalf("expected %q got %q", "id1", res.Hits[0].ID)
}
if res.Hits[1].ID != "id2" {
t.Fatalf("expected %q got %q", "id2", res.Hits[0].Index)
}
}
func Test_simpleIPv4MatchQuery(t *testing.T) {
idx := createIdx(t)
defer idx.Close()
err := idx.Index("id1", doc{"192.168.1.21"})
if err != nil {
t.Fatal(err)
}
reqStr := `192.168.1.21`
query := bleve.NewIPRangeQuery(reqStr)
query.FieldVal = "ip"
search := bleve.NewSearchRequest(query)
res, err := idx.Search(search)
if err != nil {
t.Fatal(err)
}
if res.Total != 1 {
t.Fatalf("failed to find %q, res -> %s", reqStr, res)
}
if res.Hits[0].ID != "id1" {
t.Fatalf("expected %q got %q", "id1", res.Hits[0].Index)
}
}
func Test_ipv4LiteralData(t *testing.T) {
idx := createIdx(t)
defer idx.Close()
type stronglyTyped struct {
IP net.IP `json:"ip"`
}
err := idx.Index("id1", stronglyTyped{net.ParseIP("192.168.1.21")})
if err != nil {
t.Fatal(err)
}
reqStr := `192.168.1.0/24`
query := bleve.NewIPRangeQuery(reqStr)
query.FieldVal = "ip"
search := bleve.NewSearchRequest(query)
res, err := idx.Search(search)
if err != nil {
t.Fatal(err)
}
if res.Total != 1 {
t.Fatalf("failed to find %q, res -> %s", reqStr, res)
}
if res.Hits[0].ID != "id1" {
t.Fatalf("expected %q got %q", "id1", res.Hits[0].Index)
}
}
func Test_badIPFmt(t *testing.T) {
idx := createIdx(t)
defer idx.Close()
reqStr := `192.168.1.`
query := bleve.NewIPRangeQuery(reqStr)
query.FieldVal = "ip"
search := bleve.NewSearchRequest(query)
_, err := idx.Search(search)
if err == nil {
t.Errorf("%q is not a valid IP", reqStr)
}
}
func Test_badCIDRFmt(t *testing.T) {
idx := createIdx(t)
defer idx.Close()
reqStr := `/`
query := bleve.NewIPRangeQuery(reqStr)
query.FieldVal = "ip"
err := query.Validate()
if err == nil {
t.Errorf("%q is not a valid CIDR", reqStr)
}
search := bleve.NewSearchRequest(query)
_, err = idx.Search(search)
if err == nil {
t.Errorf("%q is not a valid CIDR", reqStr)
}
}
================================================
FILE: test/tests/alias/datasets/shard0/a.json
================================================
{
"name": "a"
}
================================================
FILE: test/tests/alias/datasets/shard0/c.json
================================================
{
"name": "c"
}
================================================
FILE: test/tests/alias/datasets/shard1/b.json
================================================
{
"name": "b"
}
================================================
FILE: test/tests/alias/datasets/shard1/d.json
================================================
{
"name": "d"
}
================================================
FILE: test/tests/alias/mapping.json
================================================
{
"default_analyzer": "keyword"
}
================================================
FILE: test/tests/alias/searches.json
================================================
[
{
"comment": "match all across shards",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"match_all": {}
}
},
"result": {
"total_hits": 4,
"hits": [
{
"id": "a"
},
{
"id": "b"
},
{
"id": "c"
},
{
"id": "d"
}
]
}
},
{
"comment": "search after b (page 2 when size=2)",
"search": {
"from": 0,
"size": 2,
"sort": ["name"],
"search_after": ["b"],
"query": {
"match_all": {}
}
},
"result": {
"total_hits": 4,
"hits": [
{
"id": "c"
},
{
"id": "d"
}
]
}
},
{
"comment": "search before c (page 1 when size=2)",
"search": {
"from": 0,
"size": 2,
"sort": ["name"],
"search_before": ["c"],
"query": {
"match_all": {}
}
},
"result": {
"total_hits": 4,
"hits": [
{
"id": "a"
},
{
"id": "b"
}
]
}
}
]
================================================
FILE: test/tests/basic/data/a.json
================================================
{
"id": "a",
"name": "marty",
"age": 19,
"title": "mista",
"tags": ["gopher", "belieber"]
}
================================================
FILE: test/tests/basic/data/b.json
================================================
{
"id": "b",
"name": "steve has long & complicated name",
"age": 27,
"birthday": "2001-09-09T01:46:40Z",
"title": "missess"
}
================================================
FILE: test/tests/basic/data/c.json
================================================
{
"id": "c",
"name": "bob walks home",
"age": 64,
"birthday": "2014-05-13T16:53:20Z",
"title": "masta"
}
================================================
FILE: test/tests/basic/data/d.json
================================================
{
"id": "d",
"name": "bobbleheaded wings top the phone",
"age": 72,
"birthday": "2014-05-13T16:53:20Z",
"title": "mizz"
}
================================================
FILE: test/tests/basic/mapping.json
================================================
{
"types": {
"person": {
"properties": {
"name": {
"fields": [
{
"include_term_vectors": true,
"include_in_all": true,
"index": true,
"store": true,
"analyzer": "en",
"type": "text"
}
],
"dynamic": true,
"enabled": true
},
"id": {
"dynamic": false,
"enabled": false
}
}
}
},
"default_type": "person"
}
================================================
FILE: test/tests/basic/searches.json
================================================
[
{
"comment": "test term search, exact match",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "name",
"term": "marti"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"comment": "test term search, no match",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "name",
"term": "noone"
}
},
"result": {
"total_hits": 0,
"hits": []
}
},
{
"comment": "test match phrase search",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"match_phrase": "steve has"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "b"
}
]
}
},
{
"comment": "test term search, no match",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "name",
"term": "walking"
}
},
"result": {
"total_hits": 0,
"hits": []
}
},
{
"comment": "test match search, matching due to analysis",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"fuzziness": 0,
"prefix_length": 0,
"field": "name",
"match": "walking"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "c"
}
]
}
},
{
"comment": "test term prefix search",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "name",
"prefix": "bobble"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "d"
}
]
}
},
{
"comment": "test simple query string",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"query": "+name:phone"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "d"
}
]
}
},
{
"comment": "test numeric range, no lower bound",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "age",
"max": 30
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "a"
},
{
"id": "b"
}
]
}
},
{
"comment": "test numeric range, upper and lower bounds",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "age",
"max": 30,
"min": 20
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "b"
}
]
}
},
{
"comment": "test conjunction of numeric range, upper and lower bounds",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"conjuncts": [
{
"boost": 1,
"field": "age",
"min": 20
},
{
"boost": 1,
"field": "age",
"max": 30
}
]
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "b"
}
]
}
},
{
"comment": "test date range, no upper bound",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "birthday",
"start": "2010-01-01"
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "c"
},
{
"id": "d"
}
]
}
},
{
"comment": "test numeric range, no lower bound",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "birthday",
"end": "2010-01-01"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "b"
}
]
}
},
{
"comment": "test term search, matching inside an array",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "tags",
"term": "gopher"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"comment": "test term search, matching another element inside array",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "tags",
"term": "belieber"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"comment": "test term search, not present in array",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "tags",
"term": "notintagsarray"
}
},
"result": {
"total_hits": 0,
"hits": []
}
},
{
"comment": "with size 0, total should be 1, but hits empty",
"search": {
"from": 0,
"size": 0,
"sort": ["-_score", "_id"],
"query": {
"field": "name",
"term": "marti"
}
},
"result": {
"total_hits": 1,
"hits": []
}
},
{
"comment": "a search for doc a that includes tags field, verifies both values come back",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"fields": ["tags"],
"query": {
"field": "name",
"term": "marti"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a",
"fields": {
"tags": ["gopher", "belieber"]
}
}
]
}
},
{
"comment": "test fuzzy search, fuzziness 1 with match",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "name",
"term": "msrti",
"fuzziness": 1
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"comment": "highlight results",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "name",
"match": "long"
},
"highlight": {
"fields": ["name"]
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "b",
"fragments": {
"name": ["steve has <a> long & complicated name"]
}
}
]
}
},
{
"comment": "highlight results without specifying fields",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "name",
"match": "long"
},
"highlight": {}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "b",
"fragments": {
"name": ["steve has <a> long & complicated name"]
}
}
]
}
},
{
"comment": "request fields",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"fields": ["age","birthday"],
"query": {
"field": "name",
"match": "long"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "b",
"fields": {
"age": 27,
"birthday": "2001-09-09T01:46:40Z"
}
}
]
}
},
{
"comment": "tests query string only containing MUST NOT clause, bug #193",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"query": "-title:mista"
}
},
"result": {
"total_hits": 3,
"hits": [
{
"id": "b"
},
{
"id": "c"
},
{
"id": "d"
}
]
}
},
{
"comment": "highlight results including non-matching field (which should be produced in its entirety, though unhighlighted)",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "name",
"match": "long"
},
"highlight": {
"fields": ["name", "title"]
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "b",
"fragments": {
"name": ["steve has <a> long & complicated name"],
"title": ["missess"]
}
}
]
}
},
{
"comment": "search and highlight an array field",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "tags",
"match": "gopher"
},
"highlight": {
"fields": ["tags"]
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a",
"fragments": {
"tags": ["gopher"]
}
}
]
}
},
{
"comment": "reproduce bug in prefix search",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "title",
"prefix": "miss"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "b"
}
]
}
},
{
"comment": "test match none",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"match_none": {}
}
},
"result": {
"total_hits": 0,
"hits": []
}
},
{
"comment": "test match all",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"match_all": {}
}
},
"result": {
"total_hits": 4,
"hits": [
{
"id": "a"
},
{
"id": "b"
},
{
"id": "c"
},
{
"id": "d"
}
]
}
},
{
"comment": "test doc id query",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"ids": ["b", "c"]
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "b"
},
{
"id": "c"
}
]
}
},
{
"comment": "test query string MUST and SHOULD",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"query": "+age:>20 missess"
}
},
"result": {
"total_hits": 3,
"hits": [
{
"id": "b"
},
{
"id": "c"
},
{
"id": "d"
}
]
}
},
{
"comment": "test regexp matching term",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "name",
"regexp": "mar.*"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"comment": "test regexp that should not match when properly anchored",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "name",
"regexp": "mar."
}
},
"result": {
"total_hits": 0,
"hits": []
}
},
{
"comment": "test wildcard matching term",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "name",
"wildcard": "mar*"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"comment": "test boost - term query",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"disjuncts": [
{
"field": "name",
"term": "marti",
"boost": 1.0
},
{
"field": "name",
"term": "steve",
"boost": 5.0
}
]
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "b"
},
{
"id": "a"
}
]
}
},
{
"comment": "test boost - term query",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"disjuncts": [
{
"field": "name",
"term": "marti",
"boost": 1.0
},
{
"fuzziness": 1,
"field": "name",
"term": "steve",
"boost": 5.0
}
]
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "b"
},
{
"id": "a"
}
]
}
},
{
"comment": "test boost - numeric range query",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"disjuncts": [
{
"field": "name",
"term": "marti",
"boost": 1.0
},
{
"field": "age",
"min": 25,
"max": 29,
"boost": 50.0
}
]
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "b"
},
{
"id": "a"
}
]
}
},
{
"comment": "test boost - regexp query",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"disjuncts": [
{
"field": "name",
"term": "marti",
"boost": 1.0
},
{
"field": "name",
"regexp": "stev.*",
"boost": 5.0
}
]
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "b"
},
{
"id": "a"
}
]
}
},
{
"comment": "test wildcard inside query string",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"query": "name:mar*"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"comment": "test regexp inside query string",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"query": "name:/mar.*/"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"comment": "test term range",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "title",
"max": "miz",
"min": "mis"
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "a"
},
{
"id": "b"
}
]
}
}
]
================================================
FILE: test/tests/employee/data/emp10508560.json
================================================
{
"salary": 104561.8,
"_type": "emp",
"name": "Deirdre Reed",
"mutated": 0,
"is_manager": true,
"dept": "Accounts",
"join_date": "2003-05-28T21:29:00",
"manages": {
"team_size": 9,
"reports": [
"Gallia Julián",
"Duvessa Nicolás",
"Beryl Thomas",
"Deirdre Julián",
"Antonia Gerónimo",
"Ciara Young",
"Riona Richardson IX",
"Severin Jr.",
"Perdita Morgan"
]
},
"languages_known": [
"English",
"Spanish",
"German",
"Italian",
"French",
"Arabic",
"Africans",
"Hindi",
"Vietnamese",
"Urdu",
"Dutch",
"Quechua",
"Japanese",
"Chinese",
"Nepalese",
"Thai",
"Malay"
],
"emp_id": "10508560",
"email": "deirdre@mcdiabetes.com"
}
================================================
FILE: test/tests/employee/mapping.json
================================================
{}
================================================
FILE: test/tests/employee/searches.json
================================================
[
{
"comment": "test array position output",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "manages.reports",
"term": "julián"
},
"includeLocations": true
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "emp10508560",
"locations": {
"manages.reports": {
"julián": [
{
"pos": 2,
"start": 7,
"end": 14,
"array_positions":[0]
},
{
"pos": 2,
"start": 8,
"end": 15,
"array_positions":[3]
}
]
}
}
}
]
}
}
]
================================================
FILE: test/tests/facet/data/a.json
================================================
{
"category": "inventory",
"type": "book",
"rating": 2,
"updated": "2014-11-25"
}
================================================
FILE: test/tests/facet/data/b.json
================================================
{
"category": "inventory",
"type": "book",
"rating": 7,
"updated": "2013-07-25"
}
================================================
FILE: test/tests/facet/data/c.json
================================================
{
"category": "inventory",
"type": "book",
"rating": 1,
"updated": "2014-03-03"
}
================================================
FILE: test/tests/facet/data/d.json
================================================
{
"category": "inventory",
"type": "book",
"rating": 9,
"updated": "2014-09-16"
}
================================================
FILE: test/tests/facet/data/e.json
================================================
{
"category": "inventory",
"type": "book",
"rating": 5,
"updated": "2014-11-15"
}
================================================
FILE: test/tests/facet/data/f.json
================================================
{
"category": "inventory",
"type": "movie",
"rating": 3,
"updated": "2017-06-05"
}
================================================
FILE: test/tests/facet/data/g.json
================================================
{
"category": "inventory",
"type": "movie",
"rating": 9,
"updated": "2011-10-03"
}
================================================
FILE: test/tests/facet/data/h.json
================================================
{
"category": "inventory",
"type": "movie",
"rating": 9,
"updated": "2019-08-26"
}
================================================
FILE: test/tests/facet/data/i.json
================================================
{
"category": "inventory",
"type": "movie",
"rating": 1,
"updated": "2014-12-14"
}
================================================
FILE: test/tests/facet/data/j.json
================================================
{
"category": "inventory",
"type": "game",
"rating": 9,
"updated": "2013-10-20"
}
================================================
FILE: test/tests/facet/mapping.json
================================================
{}
================================================
FILE: test/tests/facet/searches.json
================================================
[
{
"search": {
"from": 0,
"size": 0,
"query": {
"field": "category",
"term": "inventory"
},
"facets": {
"types": {
"size": 3,
"field": "type"
}
}
},
"result": {
"total_hits": 10,
"hits": [],
"facets": {
"types": {
"field": "type",
"total": 10,
"missing": 0,
"other": 0,
"terms": [
{
"term": "book",
"count": 5
},
{
"term": "movie",
"count": 4
},
{
"term": "game",
"count": 1
}
]
}
}
}
},
{
"search": {
"from": 0,
"size": 0,
"query": {
"field": "category",
"term": "inventory"
},
"facets": {
"types": {
"size": 3,
"field": "rating",
"numeric_ranges": [
{
"name": "low",
"max": 5
},
{
"name": "high",
"min": 5
}
]
}
}
},
"result": {
"total_hits": 10,
"hits": [],
"facets": {
"types": {
"field": "rating",
"total": 10,
"missing": 0,
"other": 0,
"numeric_ranges": [
{
"name": "high",
"count": 6,
"min": 5
},
{
"name": "low",
"count": 4,
"max": 5
}
]
}
}
}
},
{
"search": {
"from": 0,
"size": 0,
"query": {
"field": "category",
"term": "inventory"
},
"facets": {
"types": {
"size": 3,
"field": "updated",
"date_ranges": [
{
"name": "old",
"end": "2012-01-01"
},
{
"name": "new",
"start": "2012-01-01"
}
]
}
}
},
"result": {
"total_hits": 10,
"hits": [],
"facets": {
"types": {
"field": "updated",
"total": 10,
"missing": 0,
"other": 0,
"date_ranges": [
{
"name": "new",
"count": 9,
"start": "2012-01-01T00:00:00Z"
},
{
"name": "old",
"count": 1,
"end": "2012-01-01T00:00:00Z"
}
]
}
}
}
}
]
================================================
FILE: test/tests/fosdem/data/3311@FOSDEM15@fosdem.org.json
================================================
{
"description": "From Prolog to Erlang to Haskell to Lisp to TLC and then back to Prolog I have journeyed, and I'd like to share some of the beautiful",
"category": "Word"
}
================================================
FILE: test/tests/fosdem/data/3492@FOSDEM15@fosdem.org.json
================================================
{
"description": "different cats",
"category": "Perl"
}
================================================
FILE: test/tests/fosdem/data/3496@FOSDEM15@fosdem.org.json
================================================
{
"description": "many cats",
"category": "Perl"
}
================================================
FILE: test/tests/fosdem/data/3505@FOSDEM15@fosdem.org.json
================================================
{
"description": "From Prolog to Erlang to Haskell to Lisp to TLC and then back to Prolog I have journeyed, and I'd like to share some of the beautiful",
"category": "Perl"
}
================================================
FILE: test/tests/fosdem/data/3507@FOSDEM15@fosdem.org.json
================================================
{
"description": "From Prolog to Erlang to Haskell to Gel to TLC and then back to Prolog I have journeyed, and I'd like to share some of the beautiful",
"category": "Perl"
}
================================================
FILE: test/tests/fosdem/mapping.json
================================================
{
"default_mapping": {
"enabled": true,
"dynamic": true,
"properties": {
"category": {
"enabled": true,
"dynamic": true,
"fields": [
{
"type": "text",
"analyzer": "keyword",
"store": true,
"index": true,
"include_term_vectors": true,
"include_in_all": true
}
],
"default_analyzer": ""
},
"description": {
"enabled": true,
"dynamic": true,
"fields": [
{
"type": "text",
"analyzer": "en",
"store": true,
"index": true,
"include_term_vectors": true,
"include_in_all": true
}
],
"default_analyzer": ""
},
"summary": {
"enabled": true,
"dynamic": true,
"fields": [
{
"type": "text",
"analyzer": "en",
"store": true,
"index": true,
"include_term_vectors": true,
"include_in_all": true
}
],
"default_analyzer": ""
},
"url": {
"enabled": true,
"dynamic": true,
"fields": [
{
"type": "text",
"analyzer": "keyword",
"store": true,
"index": true,
"include_term_vectors": true,
"include_in_all": true
}
],
"default_analyzer": ""
}
},
"default_analyzer": ""
},
"type_field": "_type",
"default_type": "_default",
"default_analyzer": "en",
"default_datetime_parser": "dateTimeOptional",
"default_field": "_all",
"byte_array_converter": "json",
"analysis": {}
}
================================================
FILE: test/tests/fosdem/searches.json
================================================
[
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "category",
"match_phrase": "Perl"
}
},
"result": {
"total_hits": 4,
"hits": [
{
"id": "3492@FOSDEM15@fosdem.org"
},
{
"id": "3496@FOSDEM15@fosdem.org"
},
{
"id": "3505@FOSDEM15@fosdem.org"
},
{
"id": "3507@FOSDEM15@fosdem.org"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"match": "lisp"
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "3311@FOSDEM15@fosdem.org"
},
{
"id": "3505@FOSDEM15@fosdem.org"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {"boost":1,"query":"+lisp +category:Perl"}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "3505@FOSDEM15@fosdem.org"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {"boost":1,"query":"+lisp +category:\"Perl\""}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "3505@FOSDEM15@fosdem.org"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"must": {
"conjuncts":[
{"boost":1,"query":"+cats"},
{"field":"category","match_phrase":"Perl"}
]
}
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "3492@FOSDEM15@fosdem.org"
},
{
"id": "3496@FOSDEM15@fosdem.org"
}
]
}
}
]
================================================
FILE: test/tests/geo/data/amoeba_brewery.json
================================================
{"name":"amoeba brewery","city":"bangalore","state":"KAR","code":"","country":"India","phone":"","website":"","type":"brewery","updated":"2019-09-17 20:00:20","description":"brewery near cb office","address":[],"geo":{"accuracy":"APPROXIMATE","lat":12.97467,"lon":77.60490}}
================================================
FILE: test/tests/geo/data/brewpub_on_the_green.json
================================================
{"name":"Brewpub-on-the-Green","city":"Fremont","state":"California","code":"","country":"United States","phone":"","website":"","type":"brewery","updated":"2010-07-22 20:00:20","description":"","address":[],"geo":{"accuracy":"APPROXIMATE","lat":37.5483,"lon":-121.989}}
================================================
FILE: test/tests/geo/data/capital_city_brewing_company.json
================================================
{"name":"Capital City Brewing Company","city":"Washington","state":"District of Columbia","code":"20005","country":"United States","phone":"202.628.2222","website":"http://www.capcitybrew.com","type":"brewery","updated":"2010-07-22 20:00:20","description":"Washington DC's first brewpub since prohibition, Capitol City Brewing Co. opened its doors in 1992. Our first location still stands in Downtown DC, at 11th and H St., NW. Our company policy is to bring the fine craft of brewing to every person who lives and visits our region, as well as treating them to a wonderful meal and a great experience.","address":["1100 New York Ave, NW"],"geo":{"accuracy":"ROOFTOP","lat":38.8999,"lon":-77.0272}}
================================================
FILE: test/tests/geo/data/communiti_brewery.json
================================================
{"name":"communiti brewery","city":"bangalore","state":"KAR","code":"","country":"India","phone":"","website":"","type":"brewery","updated":"2019-09-17 20:00:20","description":"brewery near cb office","address":[],"geo":{"accuracy":"APPROXIMATE","lat":12.97237,"lon":77.608237}}
================================================
FILE: test/tests/geo/data/firehouse_grill_brewery.json
================================================
{"name":"Firehouse Grill & Brewery","city":"Sunnyvale","state":"California","code":"94086","country":"United States","phone":"1-408-773-9500","website":"","type":"brewery","updated":"2010-07-22 20:00:20","description":"","address":["111 South Murphy Avenue"],"geo":{"accuracy":"RANGE_INTERPOLATED","lat":37.3775,"lon":-122.03}}
================================================
FILE: test/tests/geo/data/hook_ladder_brewing_company.json
================================================
{"name":"Hook & Ladder Brewing Company","city":"Silver Spring","state":"Maryland","code":"20910","country":"United States","phone":"301.565.4522","website":"http://www.hookandladderbeer.com","type":"brewery","updated":"2010-07-22 20:00:20","description":"At Hook & Ladder Brewing we believe in great beer in the company of good friends, so we bring you three great beers for your drinking pleasure (please drink responsibly). Each of our beers is carefully crafted with the finest quality ingredients for a distinctive taste we know you will enjoy. Try one tonight, you just might get hooked. Through our own experiences in the fire and rescue service we have chosen the Hook & Ladder as a symbol of pride and honor to pay tribute to the brave men and women who serve and protect our communities.","address":["8113 Fenton St."],"geo":{"accuracy":"ROOFTOP","lat":38.9911,"lon":-77.0237}}
================================================
FILE: test/tests/geo/data/jack_s_brewing.json
================================================
{"name":"Jack's Brewing","city":"Fremont","state":"California","code":"94538","country":"United States","phone":"1-510-796-2036","website":"","type":"brewery","updated":"2010-07-22 20:00:20","description":"","address":["39176 Argonaut Way"],"geo":{"accuracy":"ROOFTOP","lat":37.5441,"lon":-121.988}}
================================================
FILE: test/tests/geo/data/social_brewery.json
================================================
{"name":"social brewery","city":"bangalore","state":"KAR","code":"","country":"India","phone":"","website":"","type":"brewery","updated":"2019-09-17 20:00:20","description":"brewery near cb office, but outside the polygon","address":[],"geo":{"accuracy":"APPROXIMATE","lat":12.9736946,"lon":77.6042133}}
================================================
FILE: test/tests/geo/data/sweet_water_tavern_and_brewery.json
================================================
{"name":"Sweet Water Tavern and Brewery","city":"Sterling","state":"Virginia","code":"20121","country":"United States","phone":"(703) 449-1108","website":"http://www.greatamericanrestaurants.com/sweetMainSter/index.htm","type":"brewery","updated":"2010-07-22 20:00:20","description":"","address":["45980 Waterview Plaza"],"geo":{"accuracy":"RANGE_INTERPOLATED","lat":39.0324,"lon":-77.4097}}
================================================
FILE: test/tests/geo/mapping.json
================================================
{
"types": {
"brewery": {
"properties": {
"name": {
"fields": [
{
"include_term_vectors": true,
"include_in_all": true,
"index": true,
"store": true,
"analyzer": "keyword",
"type": "text"
}
],
"dynamic": true,
"enabled": true
},
"geo": {
"fields": [
{
"include_term_vectors": true,
"include_in_all": true,
"index": true,
"store": true,
"type": "geopoint"
}
],
"dynamic": true,
"enabled": true
}
}
}
},
"default_type": "brewery"
}
================================================
FILE: test/tests/geo/searches.json
================================================
[
{
"comment": "breweries near the couchbase office",
"search": {
"from": 0,
"size": 10,
"query": {
"location": {
"lon": -122.107799,
"lat": 37.399285
},
"distance": "100mi",
"field": "geo"
},
"sort": [
{
"by": "geo_distance",
"field": "geo",
"unit": "mi",
"location": {
"lon": -122.107799,
"lat": 37.399285
}
}
]
},
"result": {
"total_hits": 3,
"hits": [
{
"id": "firehouse_grill_brewery"
},
{
"id": "jack_s_brewing"
},
{
"id": "brewpub_on_the_green"
}
]
}
},
{
"comment": "breweries near the whitehouse",
"search": {
"from": 0,
"size": 10,
"query": {
"location": {
"lon": -77.0365,
"lat": 38.8977
},
"distance": "100mi",
"field": "geo"
},
"sort": [
{
"by": "geo_distance",
"field": "geo",
"unit": "mi",
"location": {
"lon": -77.0365,
"lat": 38.8977
}
}
]
},
"result": {
"total_hits": 3,
"hits": [
{
"id": "capital_city_brewing_company"
},
{
"id": "hook_ladder_brewing_company"
},
{
"id": "sweet_water_tavern_and_brewery"
}
]
}
},
{
"comment": "bounding box of USA",
"search": {
"from": 0,
"size": 10,
"query": {
"top_left": {
"lon": -125.0011,
"lat": 49.5904
},
"bottom_right": {
"lon": -66.9326,
"lat": 24.9493
},
"field": "geo"
},
"sort": [
"name"
]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "brewpub_on_the_green"
},
{
"id": "capital_city_brewing_company"
},
{
"id": "firehouse_grill_brewery"
},
{
"id": "hook_ladder_brewing_company"
},
{
"id": "jack_s_brewing"
},
{
"id": "sweet_water_tavern_and_brewery"
}
]
}
},
{
"comment": "bounding box around DC area",
"search": {
"from": 0,
"size": 10,
"query": {
"top_left": {
"lon": -78,
"lat": 39.5
},
"bottom_right": {
"lon": -76,
"lat": 38.5
},
"field": "geo"
},
"sort": [
"name"
]
},
"result": {
"total_hits": 3,
"hits": [
{
"id": "capital_city_brewing_company"
},
{
"id": "hook_ladder_brewing_company"
},
{
"id": "sweet_water_tavern_and_brewery"
}
]
}
},
{
"comment": "breweries near the couchbase office, using GeoJSON style points",
"search": {
"from": 0,
"size": 10,
"query": {
"location": [-122.107799,37.399285],
"distance": "100mi",
"field": "geo"
},
"sort": [
{
"by": "geo_distance",
"field": "geo",
"unit": "mi",
"location": [-122.107799,37.399285]
}
]
},
"result": {
"total_hits": 3,
"hits": [
{
"id": "firehouse_grill_brewery"
},
{
"id": "jack_s_brewing"
},
{
"id": "brewpub_on_the_green"
}
]
}
},
{
"comment": "bounding box around DC area, using GeoJSON style",
"search": {
"from": 0,
"size": 10,
"query": {
"top_left": [-78,39.5],
"bottom_right": [-76,38.5],
"field": "geo"
},
"sort": [
"name"
]
},
"result": {
"total_hits": 3,
"hits": [
{
"id": "capital_city_brewing_company"
},
{
"id": "hook_ladder_brewing_company"
},
{
"id": "sweet_water_tavern_and_brewery"
}
]
}
},
{
"comment": "polygon around cb office area, using GeoJSON lat/lon as array",
"search": {
"from": 0,
"size": 10,
"query": {
"polygon_points": [[77.607749,12.974872],[77.6101101,12.971725],[77.606912,12.972530],[77.603780,12.975112]],
"field": "geo"
},
"sort": [
"name"
]
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "amoeba_brewery"
},
{
"id": "communiti_brewery"
}
]
}
},
{
"comment": "polygon around cb office area, using GeoJSON lat/lon as string",
"search": {
"from": 0,
"size": 10,
"query": {
"polygon_points": ["12.974872, 77.607749","12.971725, 77.6101101","12.972530, 77.606912","12.975112, 77.603780"],
"field": "geo"
},
"sort": [
"name"
]
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "amoeba_brewery"
},
{
"id": "communiti_brewery"
}
]
}
},
{
"comment": "polygon around cb office area",
"search": {
"from": 0,
"size": 10,
"query": {
"polygon_points": [{"lat":12.974872, "lon":77.607749}, {"lat":12.971725, "lon":77.6101101},
{"lat":12.972530, "lon":77.606912}, {"lat":12.975112, "lon":77.603780}],
"field": "geo"
},
"sort": [
"name"
]
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "amoeba_brewery"
},
{
"id": "communiti_brewery"
}
]
}
},
{
"comment": "polygon around cb office area as geohash",
"search": {
"from": 0,
"size": 10,
"query": {
"polygon_points": ["tdr1y40", "tdr1y13", "tdr1vcx", "tdr1vfj"],
"field": "geo"
},
"sort": [
"name"
]
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "amoeba_brewery"
},
{
"id": "communiti_brewery"
}
]
}
}
]
================================================
FILE: test/tests/geoshapes/data/circle_halairport.json
================================================
{
"name": "hal airpork circular region",
"city": "bangalore",
"type": "geoshapes",
"description": "circle covering the hal airport",
"region": {
"type": "Circle",
"coordinates": [
77.6698637008667,
12.951865687866821
],
"radius": "2.4km"
}
}
================================================
FILE: test/tests/geoshapes/data/envelope_brockwell_park.json
================================================
{
"name": "brockwell park envelope",
"city": "london",
"type": "geoshapes",
"description": "brockwell park envelope",
"region": {
"type": "envelope",
"coordinates": [
[
-0.11278152465820314,
51.44579626059569
],
[
-0.10037899017333984,
51.45566490761856
]
]
}
}
================================================
FILE: test/tests/geoshapes/data/geometrycollection_tvm.json
================================================
{
"name": "geometrycollection comprised of various shapes",
"city": "bangalore",
"type": "geoshapes",
"description": "geometrycollection comprised of various shapes",
"region": {
"type": "geometrycollection",
"geometries": [
{
"type": "point",
"coordinates": [
76.92815780639648,
8.525851789596233
]
},
{
"type": "LineString",
"coordinates": [
[
76.92867279052734,
8.490369393806219
],
[
76.94377899169922,
8.494104537551882
]
]
},
{
"type": "polygon",
"coordinates": [
[
[
76.92815780639648,
8.525851789596233
],
[
76.92060470581055,
8.520504174874656
],
[
76.92206382751465,
8.519061154914393
],
[
76.92824363708496,
8.519061154914393
],
[
76.92970275878906,
8.523475081176768
],
[
76.92815780639648,
8.525851789596233
]
]
]
},
{
"type": "multipoint",
"coordinates": [
[
76.90670013427733,
8.497839644932787
],
[
76.94137573242188,
8.485275957394883
]
]
},
{
"type": "multiLineString",
"coordinates": [
[
[
76.89322471618651,
8.521522773921424
],
[
76.89648628234863,
8.518042549311815
]
],
[
[
76.9068717956543,
8.494783650690053
],
[
76.93296432495117,
8.468552033040881
]
]
]
},
{
"type": "multipolygon",
"coordinates": [
[
[
[
76.90249443054199,
8.546138091708775
],
[
76.89983367919922,
8.541300033890494
],
[
76.90498352050781,
8.53985709248573
],
[
76.90858840942383,
8.54520443620746
],
[
76.90712928771973,
8.548090273095957
],
[
76.90249443054199,
8.546138091708775
]
]
],
[
[
[
76.88326835632324,
8.564131732621458
],
[
76.88429832458496,
8.555729147617923
],
[
76.88893318176268,
8.552079482230221
],
[
76.89339637756348,
8.55369212938781
],
[
76.89494132995605,
8.56133089156368
],
[
76.89116477966309,
8.566423314514562
],
[
76.88326835632324,
8.564131732621458
]
]
]
]
}
]
}
}
================================================
FILE: test/tests/geoshapes/data/linestring_putney_bridge.json
================================================
{
"name": "linestring for putney bridge",
"city": "london",
"type": "geoshapes",
"description": "linestring for putney bridge",
"region": {
"type": "linestring",
"coordinates": [
[
-0.21183013916015625,
51.46791083061189
],
[
-0.21431922912597656,
51.465504685939706
]
]
}
}
================================================
FILE: test/tests/geoshapes/data/multilinestring_old_airport_road.json
================================================
{
"name": "road routes",
"city": "bangalore",
"type": "geoshapes",
"description": "multilinestrings approximating the roads indiranagar 100ft and old airport port road",
"region": {
"type": "multilinestring",
"coordinates": [
[
[
77.64081001281738,
12.983398626256326
],
[
77.64166831970213,
12.960648472679763
]
],
[ [
77.64192581176758,
12.960564828571133
],
[
77.66990661621094,
12.958390071883693
]
],
[ [
77.67016410827637,
12.958055492245812
],
[
77.68106460571289,
12.954626025039444
]
],
[ [
77.68149375915527,
12.954542378907867
],
[
77.7011489868164,
12.957219041184294
]
]
]
}
}
================================================
FILE: test/tests/geoshapes/data/multipoint_blr_stadiums.json
================================================
{
"name": "multipoints for stadiums",
"city": "bangalore",
"type": "geoshapes",
"description": "contains 3 points",
"region": {
"type": "multipoint",
"coordinates": [
[
77.5929594039917,
12.969347306502671
],
[
77.6004695892334,
12.979007674139009
],
[
77.60068416595459,
12.961735843534306
]
]
}
}
================================================
FILE: test/tests/geoshapes/data/multipolygon_london_parks.json
================================================
{
"name": "london parks as multipolygon",
"city": "london",
"type": "geoshapes",
"description": "multipolygon with london",
"region": {
"type": "MultiPolygon",
"coordinates": [
[
[
[
-0.163421630859375,
51.531600743186644
],
[
-0.15277862548828125,
51.52455221546295
],
[
-0.14556884765625,
51.524979430024345
],
[
-0.14591217041015625,
51.536085601784755
],
[
-0.15895843505859375,
51.53693981046689
],
[
-0.163421630859375,
51.531600743186644
]
]
],
[
[
[
-0.1902008056640625,
51.5091698216777
],
[
-0.1888275146484375,
51.50147667659363
],
[
-0.15071868896484375,
51.503186376638006
],
[
-0.1599884033203125,
51.51322956905176
],
[
-0.1902008056640625,
51.5091698216777
]
]
],
[
[
[
-0.16582489013671875,
51.4811690848672
],
[
-0.1635932922363281,
51.474861202507434
],
[
-0.14883041381835938,
51.47764105478667
],
[
-0.14951705932617188,
51.48352095330697
],
[
-0.16582489013671875,
51.4811690848672
]
]
]
]
}
}
================================================
FILE: test/tests/geoshapes/data/point_museum_of_london.json
================================================
{
"name": "geopoint for the museum of london",
"city": "london",
"type": "geoshapes",
"description": "geopoint for the museum of london",
"region": {
"type": "point",
"coordinates": [
-0.09613037109375,
51.51803669675129
]
}
}
================================================
FILE: test/tests/geoshapes/data/polygon_cubbonpark.json
================================================
{
"name": "cubbon park polygon",
"city": "bangalore",
"type": "geoshapes",
"description": "polygon inside cubbon park",
"region": {
"type": "Polygon",
"coordinates": [
[
[
77.58894681930542,
12.976498523818783
],
[
77.58677959442139,
12.974533005048169
],
[
77.5879168510437,
12.971333776381767
],
[
77.58849620819092,
12.96800904416803
],
[
77.59371042251587,
12.972128359891645
],
[
77.59512662887573,
12.973842978816679
],
[
77.59253025054932,
12.976853988320428
],
[
77.58894681930542,
12.976498523818783
]
]
]
}
}
================================================
FILE: test/tests/geoshapes/mapping.json
================================================
{
"types": {
"geoshapes": {
"properties": {
"name": {
"fields": [
{
"include_term_vectors": true,
"include_in_all": true,
"index": true,
"store": true,
"analyzer": "keyword",
"type": "text"
}
],
"dynamic": true,
"enabled": true
},
"region": {
"fields": [
{
"include_term_vectors": true,
"include_in_all": true,
"index": true,
"store": true,
"type": "geoshape"
}
],
"dynamic": true,
"enabled": true
}
}
}
},
"default_type": "geoshapes"
}
================================================
FILE: test/tests/geoshapes/searches.json
================================================
[
{
"comment": "search with a circular shape within cubbon park polygon",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "Circle",
"coordinates": [
77.59092092514038,
12.975494856600474
],
"radius": "0.1km"
},
"relation": "contains"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "polygon_cubbonpark"
}
]
}
},
{
"comment": "search with a circular shape within cubbon park polygon, (circle doesn't fully contained within)",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "circle",
"coordinates": [
77.59092092514038,
12.975494856600474
],
"radius": "150m"
},
"relation": "contains"
}
}
},
"result": {
"total_hits": 0,
"hits": []
}
},
{
"comment": "search with a polygon that contains the cubbon park polygon",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "Polygon",
"coordinates": [
[
[
77.58617877960205,
12.9772303619447
],
[
77.58630752563477,
12.966419848296587
],
[
77.59802341461182,
12.968887279637073
],
[
77.5989246368408,
12.980304058548604
],
[
77.58617877960205,
12.9772303619447
]
]
]
},
"relation": "within"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "polygon_cubbonpark"
}
]
}
},
{
"comment": "search with a multipolygon that intersects the cubbon park polygon",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "multipolygon",
"coordinates": [
[
[
[
77.58268117904663,
12.980513152175025
],
[
77.58147954940794,
12.977983107483992
],
[
77.58708000183104,
12.97886130773254
],
[
77.58268117904663,
12.980513152175025
]
]
],
[
[
[
77.5864577293396,
12.97762764459667
],
[
77.58879661560059,
12.975076660730531
],
[
77.59115695953369,
12.979216768855913
],
[
77.5864577293396,
12.97762764459667
]
]
]
]
},
"relation": "intersects"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "polygon_cubbonpark"
}
]
}
},
{
"comment": "search with multilinestrings that intersects the cubbon park polygon",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "multilinestring",
"coordinates": [
[
[
77.58761644363403,
12.974302996517075
],
[
77.59319543838501,
12.978401298465434
]
],
[
[
77.5947618484497,
12.98500862259466
],
[
77.59808778762817,
12.983565899088745
]
],
[
[
77.60109186172485,
12.973529329896703
],
[
77.59943962097168,
12.970225537247586
]
]
]
},
"relation": "intersects"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "polygon_cubbonpark"
}
]
}
},
{
"comment": "search with multilinestrings that aren't contained within the cubbon park polygon",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "multilinestring",
"coordinates": [
[
[
77.58761644363403,
12.974302996517075
],
[
77.59319543838501,
12.978401298465434
]
],
[
[
77.5947618484497,
12.98500862259466
],
[
77.59808778762817,
12.983565899088745
]
],
[
[
77.60109186172485,
12.973529329896703
],
[
77.59943962097168,
12.970225537247586
]
]
]
},
"relation": "contains"
}
}
},
"result": {
"total_hits": 0,
"hits": []
}
},
{
"comment": "search with multilinestrings that are all contained within the cubbon park polygon",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "multilinestring",
"coordinates": [
[
[
77.59107112884521,
12.975243939162915
],
[
77.59190797805786,
12.973842978816679
]
],
[
[
77.58954763412476,
12.970685561638497
],
[
77.59117841720581,
12.971835618893842
]
],
[
[
77.58851766586304,
12.973152950670608
],
[
77.58937597274779,
12.972212000113458
]
]
]
},
"relation": "contains"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "polygon_cubbonpark"
}
]
}
},
{
"comment": "search with point that is contained within the cubbon park polygon",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "Point",
"coordinates": [
77.59107112884521,
12.975243939162915
]
},
"relation": "contains"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "polygon_cubbonpark"
}
]
}
},
{
"comment": "search with an envelope that is within the cubbon park polygon",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "Envelope",
"coordinates": [
[
77.59158611297607,
12.9720028995062035
],
[
77.59263753890991,
12.973173860642571
]
]
},
"relation": "contains"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "polygon_cubbonpark"
}
]
}
},
{
"comment": "search with an envelope that contains the cubbon park polygon",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "Envelope",
"coordinates": [
[
77.57969856262207,
12.9641614998626
],
[
77.60295867919922,
12.989336742847172
]
]
},
"relation": "within"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "polygon_cubbonpark"
}
]
}
},
{
"comment": "search with a geometrycollection that is within the cubbon park polygon",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "geometrycollection",
"geometries": [
{
"type": "point",
"coordinates": [
77.59158611297607,
12.972002899506203
]
},
{
"type": "LineString",
"coordinates": [
[
77.58851766586304,
12.973152950670608
],
[
77.58937597274779,
12.972212000113458
]
]
},
{
"type": "polygon",
"coordinates": [
[
[
77.59055614471436,
12.974721193688106
],
[
77.58954763412476,
12.97350841995465
],
[
77.59141445159912,
12.973382960265356
],
[
77.59055614471436,
12.974721193688106
]
]
]
}
]
},
"relation": "contains"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "polygon_cubbonpark"
}
]
}
},
{
"comment": "search with a polygon that intersects the hal airport region",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "polygon",
"coordinates": [
[
[
77.67934799194336,
12.938147195017896
],
[
77.66793251037598,
12.930492951786736
],
[
77.67711639404297,
12.922127390141315
],
[
77.67934799194336,
12.938147195017896
]
]
]
},
"relation": "intersects"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "circle_halairport"
}
]
}
},
{
"comment": "search with a linestring that intersects the hal airport and cubbon park region",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "linestring",
"coordinates": [
[
77.59042739868164,
12.973529329896703
],
[
77.65892028808594,
12.950109093741462
]
]
},
"relation": "intersects"
}
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "circle_halairport"
},
{
"id": "polygon_cubbonpark"
}
]
}
},
{
"comment": "search with an envelope within the circle_halairport",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "envelope",
"coordinates": [
[
77.65625953674316,
12.943249893344905
],
[
77.68355369567871,
12.945843027882455
]
]
},
"relation": "contains"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "circle_halairport"
}
]
}
},
{
"comment": "search with a circle which intersects the road multilinestring and the hal circle",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "circle",
"coordinates": [
77.68132209777832,
12.954918786278716
],
"radius": "50m"
},
"relation": "intersects"
}
}
},
"result": {
"total_hits": 2,
"hits": [
{
"id": "circle_halairport"
},
{
"id": "multilinestring_old_airport_road"
}
]
}
},
{
"comment": "search with a polygon which intersects the road multilinestring",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "polygon",
"coordinates": [
[
[
77.64102458953856,
12.97751264178902
],
[
77.64109969139099,
12.975317123441693
],
[
77.64338493347168,
12.976728530319054
],
[
77.64102458953856,
12.97751264178902
]
]
]
},
"relation": "intersects"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "multilinestring_old_airport_road"
}
]
}
},
{
"comment": "search with a linestring which intersects the road multilinestring",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "linestring",
"coordinates": [
[
77.63969421386717,
12.978265386473618
],
[
77.64354586601257,
12.978453572288663
]
]
},
"relation": "intersects"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "multilinestring_old_airport_road"
}
]
}
},
{
"comment": "search with an envelope which intersects the road multilinestring",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "envelope",
"coordinates": [
[
77.64100313186644,
12.95902786307307
],
[
77.6419472694397,
12.96069029472353
]
]
},
"relation": "intersects"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "multilinestring_old_airport_road"
}
]
}
},
{
"comment": "search with multipoint which are contained within the multipolygon",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "multipoint",
"coordinates": [
[
-0.14797210693359375,
51.52615424940099
],
[
-0.16857147216796875,
51.50863561745838
],
[
-0.15535354614257812,
51.48010001366223
]
]
},
"relation": "contains"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "multipolygon_london_parks"
}
]
}
},
{
"comment": "search with multilinestring that are contained within the multipolygon",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "multilinestring",
"coordinates": [
[
[
-0.17063140869140625,
51.50884929989774
],
[
-0.15655517578125,
51.5072466571743
]
],
[
[
-0.16222000122070312,
51.47988619641402
],
[
-0.15466690063476562,
51.48074145939243
]
],
[
[
-0.15844345092773438,
51.53245503603458
],
[
-0.15123367309570312,
51.53170753066937
]
]
]
},
"relation": "contains"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "multipolygon_london_parks"
}
]
}
},
{
"comment": "search with multilinestring out of which one isn't contained within the multipolygon",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "multilinestring",
"coordinates": [
[
[
-0.17063140869140625,
51.50884929989774
],
[
-0.15655517578125,
51.5072466571743
]
],
[
[
-0.16222000122070312,
51.47988619641402
],
[
-0.15466690063476562,
51.48074145939243
]
],
[
[
-0.15844345092773438,
51.53245503603458
],
[
-0.15123367309570312,
51.53170753066937
]
],
[
[
-0.08651733398437499,
51.51013137348817
],
[
-0.08909225463867188,
51.50543026060529
]
]
]
},
"relation": "contains"
}
}
},
"result": {
"total_hits": 0,
"hits": []
}
},
{
"comment": "search with a geometrycollection that contains the london_parks_multipolygon",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "geometrycollection",
"geometries": [
{
"type": "multipolygon",
"coordinates": [
[
[
[
-0.19517898559570312,
51.51344322994464
],
[
-0.19277572631835938,
51.49292721420451
],
[
-0.14110565185546875,
51.49773648412071
],
[
-0.14471054077148438,
51.51889124411907
],
[
-0.19517898559570312,
51.51344322994464
]
]
],
[
[
[
-0.16925811767578122,
51.48373475351443
],
[
-0.16925811767578122,
51.47004951935931
],
[
-0.14608383178710938,
51.472722739318336
],
[
-0.14453887939453125,
51.48758298584306
],
[
-0.16925811767578122,
51.48373475351443
]
]
]
]
},
{
"type": "LineString",
"coordinates": [
[
77.58851766586304,
12.973152950670608
],
[
77.58937597274779,
12.972212000113458
]
]
},
{
"type": "polygon",
"coordinates": [
[
[
-0.17337799072265625,
51.54323910441573
],
[
-0.1668548583984375,
51.51889124411907
],
[
-0.09286880493164062,
51.53341609632549
],
[
-0.17337799072265625,
51.54323910441573
]
]
]
}
]
},
"relation": "within"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "multipolygon_london_parks"
}
]
}
},
{
"comment": "search with a circle that intersects with one of the polygons in the multipolygon_london_parks",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "circle",
"coordinates": [
-0.14265060424804688,
51.53298896092339
],
"radius": "550m"
},
"relation": "intersects"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "multipolygon_london_parks"
}
]
}
},
{
"comment": "search with a circle that contains london museum geopoint",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "circle",
"coordinates": [
-0.09115219116210938,
51.516487788780005
],
"radius": "1050m"
},
"relation": "within"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "point_museum_of_london"
}
]
}
},
{
"comment": "search with brockwell park polygon that is contained within brockwell park envelope",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "polygon",
"coordinates": [
[
[
-0.11149406433105469,
51.454942883825744
],
[
-0.11230945587158205,
51.45218839188088
],
[
-0.11136531829833984,
51.450530268053605
],
[
-0.1117086410522461,
51.44873835686053
],
[
-0.11016368865966797,
51.446010237625224
],
[
-0.10497093200683594,
51.446705656046376
],
[
-0.10192394256591797,
51.4490058107573
],
[
-0.1007223129272461,
51.45085119994589
],
[
-0.10188102722167967,
51.45218839188088
],
[
-0.10681629180908203,
51.45368600035086
],
[
-0.10715961456298828,
51.453338345620416
],
[
-0.11149406433105469,
51.454942883825744
]
]
]
},
"relation": "contains"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "envelope_brockwell_park"
}
]
}
},
{
"comment": "search with point that is contained within brockwell park envelope",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "point",
"coordinates": [
-0.10074377059936523,
51.450824455707696
]
},
"relation": "contains"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "envelope_brockwell_park"
}
]
}
},
{
"comment": "search with linestring that intersects the putney bridge",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "LineString",
"coordinates": [
[
-0.2171945571899414,
51.46876631814087
],
[
-0.2064228057861328,
51.464943233925986
]
]
},
"relation": "intersects"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "linestring_putney_bridge"
}
]
}
},
{
"comment": "search with polygon that contains the blr stadiums/multipoint",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "polygon",
"coordinates": [
[
[
77.60107040405273,
12.981349524921757
],
[
77.59270191192627,
12.969180024104505
],
[
77.60089874267577,
12.961024870820744
],
[
77.60107040405273,
12.981349524921757
]
]
]
},
"relation": "within"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "multipoint_blr_stadiums"
}
]
}
},
{
"comment": "search a point that is within the multipolygon of the geometrycollection",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "point",
"coordinates": [
76.88919067382812,
8.556238400473156
]
},
"relation": "contains"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "geometrycollection_tvm"
}
]
}
},
{
"comment": "search an envelope that intersects with the polygon of the geometrycollection",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "envelope",
"coordinates": [
[
76.91880226135254,
8.515665792358828
],
[
76.92523956298828,
8.525427378462332
]
]
},
"relation": "intersects"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "geometrycollection_tvm"
}
]
}
},
{
"comment": "search a circle that intersects with the linestring of the geometrycollection",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "circle",
"coordinates": [
76.91305160522461,
8.477890354619287
],
"radius": "1mi"
},
"relation": "intersects"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "geometrycollection_tvm"
}
]
}
},
{
"comment": "search a circle that contains the entire geometrycollection",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "circle",
"coordinates": [
76.93622589111328,
8.501574715933401
],
"radius": "10mi"
},
"relation": "within"
}
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "geometrycollection_tvm"
}
]
}
},
{
"comment": "search a polygon that contains the entire geometrycollection, circle, multilinestring, polygon, multipoint",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "circle",
"coordinates": [
77.71728515624999,
12.060809058367294
],
"radius": "1000mi"
},
"relation": "within"
}
},
"sort": ["-_id"]
},
"result": {
"total_hits": 5,
"hits": [
{
"id": "polygon_cubbonpark"
},
{
"id": "multipoint_blr_stadiums"
},
{
"id": "multilinestring_old_airport_road"
},
{
"id": "geometrycollection_tvm"
},
{
"id": "circle_halairport"
}
]
}
},
{
"comment": "search circle that contains the envelope, linestring, point, multipolygon",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "circle",
"coordinates": [
-0.23277282714843747,
51.45828549061808
],
"radius": "1000mi"
},
"relation": "within"
}
},
"sort": ["-_id"]
},
"result": {
"total_hits": 4,
"hits": [
{
"id": "point_museum_of_london"
},
{
"id": "multipolygon_london_parks"
},
{
"id": "linestring_putney_bridge"
},
{
"id": "envelope_brockwell_park"
}
]
}
},
{
"comment": "search a polygon(almost the whole earth surface) that contains every indexed shape",
"search": {
"from": 0,
"size": 10,
"query": {
"geometry": {
"shape": {
"type": "polygon",
"coordinates": [
[
[
-135.0, -38.0
],
[
149.0, -38.0
],
[
149.0, 77.0
],
[
-135.0, 77.0
]
]
]
},
"relation": "within"
}
},
"sort": ["-_id"]
},
"result": {
"total_hits": 9,
"hits": [
{
"id": "polygon_cubbonpark"
},
{
"id": "point_museum_of_london"
},
{
"id": "multipolygon_london_parks"
},
{
"id": "multipoint_blr_stadiums"
},
{
"id": "multilinestring_old_airport_road"
},
{
"id": "linestring_putney_bridge"
},
{
"id": "geometrycollection_tvm"
},
{
"id": "envelope_brockwell_park"
},
{
"id": "circle_halairport"
}
]
}
}
]
================================================
FILE: test/tests/phrase/data/a.json
================================================
{
"body": "Twenty Thousand Leagues Under The Sea"
}
================================================
FILE: test/tests/phrase/data/b.json
================================================
{
"body": ["bad call", "defenseless receiver"]
}
================================================
FILE: test/tests/phrase/mapping.json
================================================
{
"types": {
"book": {
"properties": {
"body": {
"fields": [
{
"include_term_vectors": true,
"include_in_all": true,
"index": true,
"store": true,
"analyzer": "en",
"type": "text"
}
],
"dynamic": true,
"enabled": true
}
}
}
},
"default_type": "book"
}
================================================
FILE: test/tests/phrase/searches.json
================================================
[
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Twenty"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Twenty Thousand"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Twenty Thousand Leagues"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Twenty Thousand Leagues Under"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Twenty Thousand Leagues Under the"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Twenty Thousand Leagues Under the Sea"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Thousand"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Thousand Leagues"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Thousand Leagues Under"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Thousand Leagues Under the"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Thousand Leagues Under the Sea"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Leagues"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Leagues Under"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Leagues Under the"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Leagues Under the Sea"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Under the Sea"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "the Sea"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "Sea"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "bad call"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "b"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "defenseless receiver"
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "b"
}
]
}
},
{
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"match_phrase": "bad receiver"
}
},
"result": {
"total_hits": 0,
"hits": []
}
},
{
"comment": "multi-phrase terms",
"search": {
"from": 0,
"size": 10,
"sort": ["-_score", "_id"],
"query": {
"field": "body",
"terms": [["twenti","thirti"],["thousand"]]
}
},
"result": {
"total_hits": 1,
"hits": [
{
"id": "a"
}
]
}
}
]
================================================
FILE: test/tests/sort/data/a.json
================================================
{
"id": "a",
"name": "marty",
"age": 19,
"born": "2014-11-25",
"title": "mista",
"tags": ["gopher", "belieber"]
}
================================================
FILE: test/tests/sort/data/b.json
================================================
{
"id": "b",
"name": "steve",
"age": 21,
"born": "2000-09-11",
"title": "zebra",
"tags": ["thought-leader", "futurist"]
}
================================================
FILE: test/tests/sort/data/c.json
================================================
{
"id": "c",
"name": "aster",
"age": 21,
"born": "1954-02-02",
"title": "blogger",
"tags": ["red", "blue", "green"]
}
================================================
FILE: test/tests/sort/data/d.json
================================================
{
"id": "d",
"age": 65,
"born": "1978-12-02",
"title": "agent d is desperately trying out to be successful rapster!",
"tags": ["cats"]
}
================================================
FILE: test/tests/sort/data/e.json
================================================
{
"id": "e",
"name": "nancy",
"born": "1954-10-22",
"title": "rapstar nancy rapster",
"tags": ["pain"]
}
================================================
FILE: test/tests/sort/data/f.json
================================================
{
"id": "f",
"name": "frank",
"age": 1,
"title": "frank the taxman of cb, Rapster!",
"tags": ["vitamin","purple"]
}
================================================
FILE: test/tests/sort/mapping.json
================================================
{
}
================================================
FILE: test/tests/sort/searches.json
================================================
[
{
"comment": "sort by name, ascending",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["name"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "c"
},
{
"id": "f"
},
{
"id": "a"
},
{
"id": "e"
},
{
"id": "b"
},
{
"id": "d"
}
]
}
},
{
"comment": "sort by name, descending",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["-name"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "b"
},
{
"id": "e"
},
{
"id": "a"
},
{
"id": "f"
},
{
"id": "c"
},
{
"id": "d"
}
]
}
},
{
"comment": "sort by name, descending, missing first",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": [{"by":"field","field":"name","missing":"first","desc":true}]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "d"
},
{
"id": "b"
},
{
"id": "e"
},
{
"id": "a"
},
{
"id": "f"
},
{
"id": "c"
}
]
}
},
{
"comment": "sort by age, ascending, _id, ascending",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["age", "_id"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "f"
},
{
"id": "a"
},
{
"id": "b"
},
{
"id": "c"
},
{
"id": "d"
},
{
"id": "e"
}
]
}
},
{
"comment": "sort by age, descending, _id, ascending",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["-age", "_id"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "d"
},
{
"id": "b"
},
{
"id": "c"
},
{
"id": "a"
},
{
"id": "f"
},
{
"id": "e"
}
]
}
},
{
"comment": "sort by age, descending, missing first, id, ascending",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": [{"by":"field","field":"age","missing":"first","desc":true},{"by":"id","desc":false}]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "e"
},
{
"id": "d"
},
{
"id": "b"
},
{
"id": "c"
},
{
"id": "a"
},
{
"id": "f"
}
]
}
},
{
"comment": "sort by born, ascending",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["born"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "c"
},
{
"id": "e"
},
{
"id": "d"
},
{
"id": "b"
},
{
"id": "a"
},
{
"id": "f"
}
]
}
},
{
"comment": "sort by born, descending",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["-born"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "a"
},
{
"id": "b"
},
{
"id": "d"
},
{
"id": "e"
},
{
"id": "c"
},
{
"id": "f"
}
]
}
},
{
"comment": "sort by born, descending, missing first",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": [{"by":"field","field":"born","missing":"first","desc":true}]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "f"
},
{
"id": "a"
},
{
"id": "b"
},
{
"id": "d"
},
{
"id": "e"
},
{
"id": "c"
}
]
}
},
{
"comment": "sort on multi-valued field",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": [{"by":"field","field":"tags","mode":"min"}]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "a"
},
{
"id": "c"
},
{
"id": "d"
},
{
"id": "b"
},
{
"id": "e"
},
{
"id": "f"
}
]
}
},
{
"comment": "multi-column sort by age, ascending, name, ascending (flips b and c which have same age)",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["age", "name"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "f"
},
{
"id": "a"
},
{
"id": "c"
},
{
"id": "b"
},
{
"id": "d"
},
{
"id": "e"
}
]
}
},
{
"comment": "sort by docid descending",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["-_id"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "f"
},
{
"id": "e"
},
{
"id": "d"
},
{
"id": "c"
},
{
"id": "b"
},
{
"id": "a"
}
]
}
},
{
"comment": "sort by name, ascending, after marty",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["name"],
"search_after": ["marty"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "e"
},
{
"id": "b"
},
{
"id": "d"
}
]
}
},
{
"comment": "sort by name, ascending, before nancy",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["name"],
"search_before": ["nancy"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "c"
},
{
"id": "f"
},
{
"id": "a"
}
]
}
},
{
"comment": "sort by ID, after doc d",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["_id"],
"search_after": ["d"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "e"
},
{
"id": "f"
}
]
}
},
{
"comment": "sort by ID, before doc d",
"search": {
"from": 0,
"size": 10,
"query": {
"match_all":{}
},
"sort": ["_id"],
"search_before": ["d"]
},
"result": {
"total_hits": 6,
"hits": [
{
"id": "a"
},
{
"id": "b"
},
{
"id": "c"
}
]
}
},
{
"comment": "sort by score, after score 0.286889[ e(299646) > f(286889) > d(222224)]",
"search": {
"from": 0,
"size": 10,
"query": {
"query":"rapster"
},
"sort": ["_score"],
"search_after": ["0.286889"]
},
"result": {
"total_hits": 3,
"hits": [
{
"id": "f"
},
{
"id": "e"
}
]
}
},
{
"comment": "sort by score, before score f/0.286889[ e(299646) > f(286889) > d(222224)]",
"search": {
"from": 0,
"size": 10,
"query": {
"query":"rapster"
},
"sort": ["_score"],
"search_before": ["0.286889"]
},
"result": {
"total_hits": 3,
"hits": [
{
"id": "d"
}
]
}
}
]
================================================
FILE: test/versus_score_test.go
================================================
// Copyright (c) 2018 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package test
import (
"os"
"strconv"
"testing"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/document"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/index/upsidedown"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
index "github.com/blevesearch/bleve_index_api"
)
func TestDisjunctionSearchScoreIndexWithCompositeFields(t *testing.T) {
upHits := disjunctionQueryiOnIndexWithCompositeFields(upsidedown.Name, t)
scHits := disjunctionQueryiOnIndexWithCompositeFields(scorch.Name, t)
if upHits[0].ID != scHits[0].ID || upHits[1].ID != scHits[1].ID {
t.Errorf("upsidedown, scorch returned different docs;\n"+
"upsidedown: (%s, %s), scorch: (%s, %s)\n",
upHits[0].ID, upHits[1].ID, scHits[0].ID, scHits[1].ID)
}
if scHits[0].Score != upHits[0].Score || scHits[1].Score != upHits[1].Score {
t.Errorf("upsidedown, scorch showing different scores;\n"+
"upsidedown: (%+v, %+v), scorch: (%+v, %+v)\n",
*upHits[0].Expl, *upHits[1].Expl, *scHits[0].Expl, *scHits[1].Expl)
}
}
func disjunctionQueryiOnIndexWithCompositeFields(indexName string,
t *testing.T,
) []*search.DocumentMatch {
tmpIndexPath, err := os.MkdirTemp("", "bleve-testidx")
if err != nil {
t.Fatalf("error creating temp dir: %v", err)
}
defer func() {
err := os.RemoveAll(tmpIndexPath)
if err != nil {
t.Fatalf("error removing temp dir: %v", err)
}
}()
// create an index
idxMapping := mapping.NewIndexMapping()
idx, err := bleve.NewUsing(tmpIndexPath, idxMapping, indexName,
bleve.Config.DefaultKVStore, nil)
if err != nil {
t.Error(err)
}
defer func() {
err = idx.Close()
if err != nil {
t.Error(err)
}
}()
// create and insert documents as a batch
batch := idx.NewBatch()
docs := []struct {
field1 string
field2 int
}{
{
field1: "one",
field2: 1,
},
{
field1: "two",
field2: 2,
},
}
for i := 0; i < len(docs); i++ {
doc := document.NewDocument(strconv.Itoa(docs[i].field2))
doc.Fields = []document.Field{
document.NewTextField("field1", []uint64{}, []byte(docs[i].field1)),
document.NewNumericField("field2", []uint64{}, float64(docs[i].field2)),
}
doc.CompositeFields = []*document.CompositeField{
document.NewCompositeFieldWithIndexingOptions(
"_all", true, []string{"field1"}, []string{},
index.IndexField|index.IncludeTermVectors),
}
if err = batch.IndexAdvanced(doc); err != nil {
t.Error(err)
}
}
if err = idx.Batch(batch); err != nil {
t.Error(err)
}
/*
Query:
DISJ
/ \
CONJ TERM(two)
/
TERM(one)
*/
tq1 := bleve.NewTermQuery("one")
tq1.SetBoost(2)
tq2 := bleve.NewTermQuery("two")
tq2.SetBoost(3)
cq := bleve.NewConjunctionQuery(tq1)
cq.SetBoost(4)
q := bleve.NewDisjunctionQuery(tq1, tq2)
sr := bleve.NewSearchRequestOptions(q, 2, 0, true)
res, err := idx.Search(sr)
if err != nil {
t.Error(err)
}
if len(res.Hits) != 2 {
t.Errorf("indexType: %s Expected 2 hits, but got: %v", indexName, len(res.Hits))
}
return res.Hits
}
================================================
FILE: test/versus_test.go
================================================
// Copyright (c) 2014 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package test
import (
"bytes"
"encoding/json"
"fmt"
"math"
"math/rand"
"os"
"reflect"
"strconv"
"strings"
"testing"
"text/template"
"time"
"github.com/blevesearch/bleve/v2"
"github.com/blevesearch/bleve/v2/index/scorch"
"github.com/blevesearch/bleve/v2/index/upsidedown"
"github.com/blevesearch/bleve/v2/index/upsidedown/store/boltdb"
"github.com/blevesearch/bleve/v2/mapping"
"github.com/blevesearch/bleve/v2/search"
)
// Tests scorch indexer versus upsidedown/bolt indexer against various
// templated queries. Example usage from the bleve top-level directory...
//
// go test -v -run TestScorchVersusUpsideDownBolt ./test
// VERBOSE=1 FOCUS=Trista go test -v -run TestScorchVersusUpsideDownBolt ./test
//
func init() {
// override for tests
scorch.DefaultPersisterNapTimeMSec = 1
}
func TestScorchVersusUpsideDownBoltAll(t *testing.T) {
(&VersusTest{
t: t,
NumDocs: 1000,
MaxWordsPerDoc: 20,
NumWords: 10,
BatchSize: 1000,
NumAttemptsPerSearch: 100,
}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil)
}
func TestScorchVersusUpsideDownBoltSmallMNSAM(t *testing.T) {
(&VersusTest{
t: t,
Focus: "must-not-same-as-must",
NumDocs: 5,
MaxWordsPerDoc: 2,
NumWords: 1,
BatchSize: 1,
NumAttemptsPerSearch: 1,
}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil)
}
func TestScorchVersusUpsideDownBoltSmallCMP11(t *testing.T) {
(&VersusTest{
t: t,
Focus: "conjuncts-match-phrase-1-1",
NumDocs: 30,
MaxWordsPerDoc: 8,
NumWords: 2,
BatchSize: 1,
NumAttemptsPerSearch: 1,
}).run(scorch.Name, boltdb.Name, upsidedown.Name, boltdb.Name, nil, nil)
}
// -------------------------------------------------------
// Templates used to compare search results in the "versus" tests.
var testVersusSearchTemplates = []string{
`{
"about": "expected to return zero hits",
"query": {
"query": "title:notARealTitle"
}
}`,
`{
"about": "try straight word()'s",
"query": {
"query": "body:{{word}}"
}
}`,
`{
"about": "conjuncts on same term",
"query": {
"conjuncts": [
{ "field": "body", "term": "{{word}}", "boost": 1.0 },
{ "field": "body", "term": "{{word}}", "boost": 1.0 }
]
}
}`,
`{
"about": "disjuncts on same term",
"query": {
"disjuncts": [
{ "field": "body", "term": "{{word}}", "boost": 1.0 },
{ "field": "body", "term": "{{word}}", "boost": 1.0 }
]
}
}`,
`{
"about": "never-matching-title-conjuncts",
"query": {
"conjuncts": [
{"field": "body", "match": "{{word}}"},
{"field": "body", "match": "{{word}}"},
{"field": "title", "match": "notAnActualTitle"}
]
}
}`,
`{
"about": "never-matching-title-disjuncts",
"query": {
"disjuncts": [
{"field": "body", "match": "{{word}}"},
{"field": "body", "match": "{{word}}"},
{"field": "title", "match": "notAnActualTitle"}
]
}
}`,
`{
"about": "must-not-never-matches",
"query": {
"must_not": {"disjuncts": [
{"field": "title", "match": "notAnActualTitle"}
]},
"should": {"disjuncts": [
{"field": "body", "match": "{{word}}"}
]}
}
}`,
`{
"about": "must-not-only",
"query": {
"must_not": {"disjuncts": [
{"field": "body", "term": "{{word}}"}
]}
}
}`,
`{
"about": "must-not-same-as-must -- see: MB-27291",
"query": {
"must_not": {"disjuncts": [
{"field": "body", "match": "{{word}}"}
]},
"must": {"conjuncts": [
{"field": "body", "match": "{{word}}"}
]}
}
}`,
`{
"about": "must-not-same-as-should",
"query": {
"must_not": {"disjuncts": [
{"field": "body", "match": "{{word}}"}
]},
"should": {"disjuncts": [
{"field": "body", "match": "{{word}}"}
]}
}
}`,
`{
"about": "inspired by testrunner RQG issue -- see: MB-27291",
"query": {
"must_not": {"disjuncts": [
{"field": "title", "match": "Trista Allen"},
{"field": "body", "match": "{{word}}"}
]},
"should": {"disjuncts": [
{"field": "title", "match": "Kallie Safiya Amara"},
{"field": "body", "match": "{{word}}"}
]}
}
}`,
`{
"about": "conjuncts-match-phrase-1-1 inspired by testrunner RQG issue -- see: MB-27291",
"query": {
"conjuncts": [
{"field": "body", "match": "{{bodyWord 0}}"},
{"field": "body", "match_phrase": "{{bodyWord 1}} {{bodyWord 1}}"}
]
}
}`,
`{
"about": "conjuncts-match-phrase-1-2 inspired by testrunner RQG issue -- see: MB-27291 -- FAILS!!",
"query": {
"conjuncts": [
{"field": "body", "match": "{{bodyWord 0}}"},
{"field": "body", "match_phrase": "{{bodyWord 1}} {{bodyWord 2}}"}
]
}
}`,
}
// -------------------------------------------------------
type VersusTest struct {
t *testing.T
// Use environment variable VERBOSE= that's > 0 for more
// verbose output.
Verbose int
// Allow user to focus on particular search templates, where
// where the search template must contain the Focus string.
Focus string
NumDocs int // Number of docs to insert.
MaxWordsPerDoc int // Max number words in each doc's Body field.
NumWords int // Total number of words in the dictionary.
BatchSize int // Batch size when inserting docs.
NumAttemptsPerSearch int // For each search template, number of searches to try.
// The Bodies is an array with length NumDocs, where each entry
// is the words in a doc's Body field.
Bodies [][]string
CurAttempt int
TotAttempts int
}
// -------------------------------------------------------
func testVersusSearches(vt *VersusTest, searchTemplates []string, idxA, idxB bleve.Index) {
t := vt.t
funcMap := template.FuncMap{
// Returns a word. The word may or may not be in any
// document's body.
"word": func() string {
return vt.genWord(vt.CurAttempt % vt.NumWords)
},
// Picks a document and returns the i'th word in that
// document's body. You can use this in searches to
// definitely find at least one document.
"bodyWord": func(i int) string {
body := vt.Bodies[vt.CurAttempt%len(vt.Bodies)]
if len(body) == 0 {
return ""
}
return body[i%len(body)]
},
}
// Optionally allow call to focus on a particular search templates,
// where the search template must contain the vt.Focus string.
if vt.Focus == "" {
vt.Focus = os.Getenv("FOCUS")
}
for i, searchTemplate := range searchTemplates {
if vt.Focus != "" && !strings.Contains(searchTemplate, vt.Focus) {
continue
}
tmpl, err := template.New("search").Funcs(funcMap).Parse(searchTemplate)
if err != nil {
t.Fatalf("could not parse search template: %s, err: %v", searchTemplate, err)
}
for j := 0; j < vt.NumAttemptsPerSearch; j++ {
vt.CurAttempt = j
var buf bytes.Buffer
err = tmpl.Execute(&buf, vt)
if err != nil {
t.Fatalf("could not execute search template: %s, err: %v", searchTemplate, err)
}
bufBytes := buf.Bytes()
if vt.Verbose > 0 {
fmt.Printf(" %s\n", bufBytes)
}
var search bleve.SearchRequest
err = json.Unmarshal(bufBytes, &search)
if err != nil {
t.Fatalf("could not unmarshal search: %s, err: %v", bufBytes, err)
}
search.Size = vt.NumDocs * 10 // Crank up limit to get all results.
searchA := search
searchB := search
resA, errA := idxA.Search(&searchA)
resB, errB := idxB.Search(&searchB)
if errA != errB {
t.Errorf("search: (%d) %s,\n err mismatch, errA: %v, errB: %v",
i, bufBytes, errA, errB)
}
// Scores might have float64 vs float32 wobbles, so truncate precision.
resA.MaxScore = math.Trunc(resA.MaxScore*1000.0) / 1000.0
resB.MaxScore = math.Trunc(resB.MaxScore*1000.0) / 1000.0
// Timings may be different between A & B, so force equality.
resA.Took = resB.Took
// Hits might have different ordering since some indexers
// (like upsidedown) have a natural secondary sort on id
// while others (like scorch) don't. So, we compare by
// putting the hits from A & B into maps.
hitsA := hitsById(resA)
hitsB := hitsById(resB)
for id, hitA := range hitsA {
hitB := hitsB[id]
if len(hitA.FieldTermLocations) == 0 {
hitA.FieldTermLocations = nil
}
if len(hitB.FieldTermLocations) == 0 {
hitB.FieldTermLocations = nil
}
if !reflect.DeepEqual(hitA, hitB) {
t.Errorf("\n driving from hitsA\n hitA: %#v,\n hitB: %#v", hitA, hitB)
idx, _ := strconv.Atoi(id)
t.Errorf("\n doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " "))
}
}
for id, hitB := range hitsB {
hitA := hitsA[id]
if len(hitA.FieldTermLocations) == 0 {
hitA.FieldTermLocations = nil
}
if len(hitB.FieldTermLocations) == 0 {
hitB.FieldTermLocations = nil
}
if !reflect.DeepEqual(hitA, hitB) {
t.Errorf("\n driving from hitsB\n hitA: %#v,\n hitB: %#v", hitA, hitB)
idx, _ := strconv.Atoi(id)
t.Errorf("\n doc: %d, body: %s", idx, strings.Join(vt.Bodies[idx], " "))
}
}
if !reflect.DeepEqual(hitsA, hitsB) {
t.Errorf("=========\nsearch: (%d) %s,\n res hits mismatch,\n len(hitsA): %d,\n len(hitsB): %d",
i, bufBytes, len(hitsA), len(hitsB))
t.Errorf("\n hitsA: %#v,\n hitsB: %#v",
hitsA, hitsB)
}
resA.Hits = nil
resB.Hits = nil
resA.Cost = 0
resB.Cost = 0
if !reflect.DeepEqual(resA, resB) {
resAj, _ := json.Marshal(resA)
resBj, _ := json.Marshal(resB)
t.Errorf("search: (%d) %s,\n res mismatch,\n resA: %s,\n resB: %s",
i, bufBytes, resAj, resBj)
}
if vt.Verbose > 0 {
fmt.Printf(" Total: (%t) %d\n", resA.Total == resB.Total, resA.Total)
}
vt.TotAttempts++
}
}
}
// Organizes the hits into a map keyed by id.
func hitsById(res *bleve.SearchResult) map[string]*search.DocumentMatch {
rv := make(map[string]*search.DocumentMatch, len(res.Hits))
for _, hit := range res.Hits {
// Clear out or truncate precision of hit fields that might be
// different across different indexer implementations.
hit.Index = ""
hit.Score = math.Trunc(hit.Score*1000.0) / 1000.0
hit.IndexInternalID = nil
hit.HitNumber = 0
rv[hit.ID] = hit
}
return rv
}
// -------------------------------------------------------
func (vt *VersusTest) run(indexTypeA, kvStoreA, indexTypeB, kvStoreB string,
cb func(versusTest *VersusTest, searchTemplates []string, idxA, idxB bleve.Index),
searchTemplates []string,
) {
if cb == nil {
cb = testVersusSearches
}
if searchTemplates == nil {
searchTemplates = testVersusSearchTemplates
}
if vt.Verbose <= 0 {
vt.Verbose, _ = strconv.Atoi(os.Getenv("VERBOSE"))
}
dirA := "/tmp/bleve-versus-test-a"
dirB := "/tmp/bleve-versus-test-b"
defer func() {
_ = os.RemoveAll(dirA)
_ = os.RemoveAll(dirB)
}()
_ = os.RemoveAll(dirA)
_ = os.RemoveAll(dirB)
imA := vt.makeIndexMapping()
imB := vt.makeIndexMapping()
kvConfigA := map[string]interface{}{}
kvConfigB := map[string]interface{}{}
idxA, err := bleve.NewUsing(dirA, imA, indexTypeA, kvStoreA, kvConfigA)
if err != nil || idxA == nil {
vt.t.Fatalf("new using err: %v", err)
}
defer func() { _ = idxA.Close() }()
idxB, err := bleve.NewUsing(dirB, imB, indexTypeB, kvStoreB, kvConfigB)
if err != nil || idxB == nil {
vt.t.Fatalf("new using err: %v", err)
}
defer func() { _ = idxB.Close() }()
if vt.Bodies == nil {
vt.Bodies = vt.genBodies()
}
vt.insertBodies(idxA)
vt.insertBodies(idxB)
cb(vt, searchTemplates, idxA, idxB)
}
// -------------------------------------------------------
func (vt *VersusTest) makeIndexMapping() mapping.IndexMapping {
standardFM := bleve.NewTextFieldMapping()
standardFM.Store = false
standardFM.IncludeInAll = false
standardFM.IncludeTermVectors = true
standardFM.Analyzer = "standard"
dm := bleve.NewDocumentMapping()
dm.AddFieldMappingsAt("title", standardFM)
dm.AddFieldMappingsAt("body", standardFM)
im := bleve.NewIndexMapping()
im.DefaultMapping = dm
im.DefaultAnalyzer = "standard"
return im
}
func (vt *VersusTest) insertBodies(idx bleve.Index) {
batch := idx.NewBatch()
for i, bodyWords := range vt.Bodies {
title := fmt.Sprintf("%d", i)
body := strings.Join(bodyWords, " ")
err := batch.Index(title, map[string]interface{}{"title": title, "body": body})
if err != nil {
vt.t.Fatalf("batch.Index err: %v", err)
}
if i%vt.BatchSize == 0 {
err = idx.Batch(batch)
if err != nil {
vt.t.Fatalf("batch err: %v", err)
}
batch.Reset()
}
}
err := idx.Batch(batch)
if err != nil {
vt.t.Fatalf("last batch err: %v", err)
}
}
func (vt *VersusTest) genBodies() (rv [][]string) {
for i := 0; i < vt.NumDocs; i++ {
rv = append(rv, vt.genBody())
}
return rv
}
func (vt *VersusTest) genBody() (rv []string) {
rng := rand.New(rand.NewSource(time.Now().UnixNano()))
m := rng.Intn(vt.MaxWordsPerDoc)
for j := 0; j < m; j++ {
rv = append(rv, vt.genWord(rng.Intn(vt.NumWords)))
}
return rv
}
func (vt *VersusTest) genWord(i int) string {
return fmt.Sprintf("%x", i)
}
================================================
FILE: util/extract.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package util
import (
"math"
"reflect"
)
// extract numeric value (if possible) and returns a float64
func ExtractNumericValFloat64(v interface{}) (float64, bool) {
val := reflect.ValueOf(v)
if !val.IsValid() {
return 0, false
}
switch {
case val.CanFloat():
return val.Float(), true
case val.CanInt():
return float64(val.Int()), true
case val.CanUint():
return float64(val.Uint()), true
}
return 0, false
}
// extract numeric value (if possible) and returns a float32
func ExtractNumericValFloat32(v interface{}) (float32, bool) {
val := reflect.ValueOf(v)
if !val.IsValid() {
return 0, false
}
switch {
case val.CanFloat():
floatVal := val.Float()
if !IsValidFloat32(floatVal) {
return 0, false
}
return float32(floatVal), true
case val.CanInt():
return float32(val.Int()), true
case val.CanUint():
return float32(val.Uint()), true
}
return 0, false
}
func IsValidFloat32(val float64) bool {
return !math.IsNaN(val) && !math.IsInf(val, 0) && val <= math.MaxFloat32
}
================================================
FILE: util/json.go
================================================
// Copyright (c) 2023 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package util
import (
"encoding/json"
)
// Should only be overwritten during process init()'ialization.
var (
MarshalJSON = json.Marshal
UnmarshalJSON = json.Unmarshal
)
================================================
FILE: util/keys.go
================================================
// Copyright (c) 2025 Couchbase, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
package util
var (
// Bolt keys
BoltSnapshotsBucket = []byte{'s'}
BoltPathKey = []byte{'p'}
BoltDeletedKey = []byte{'d'}
BoltInternalKey = []byte{'i'}
BoltMetaDataKey = []byte{'m'}
BoltMetaDataSegmentTypeKey = []byte("type")
BoltMetaDataSegmentVersionKey = []byte("version")
BoltMetaDataTimeStamp = []byte("timeStamp")
BoltStatsKey = []byte("stats")
BoltUpdatedFieldsKey = []byte("fields")
TotBytesWrittenKey = []byte("TotBytesWritten")
MappingInternalKey = []byte("_mapping")
)